diff --git a/README.md b/README.md index c4991fb..67b184b 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,33 @@ profanity-filter ================ -Python module that replaces inappropriate words with something more PG rated. - -Usage ------ -```python -f = Filter('badword and bad words', clean_word='unicorn') -safe_string = f.clean() -print safe_string -``` \ No newline at end of file +Python module that replaces inappropriate words with something more PG rated. Used in project that allows individuals to text message to sign used in Xmas decorations. + +Uses a line separated file listing bad words as it's source +to check if a user submitted something inappropriate. + +Code modified from orginal by Jared Mess + +Modified by: jjb +Date 1/2/2016 + +Example of code use in test_profanity_filter.py: +Run +$ python test_profnaity_filter.py + +Example Use: +text = "Cassandra is a fuCking piece of shit_on_a_long_stick" +f=Filter(text, "HAPPY") +f.clean_anywhere() +f.clean_start() +f.clean_whole_word() + +Example Output--- +Original: +Cassandra is a fuCking piece of shit_on_a_long_stick +Output from clean_anywhere: +CHAPPYandra is a HAPPYing piece of HAPPY_on_a_long_stick +Output from clean_start: +Cassandra is a HAPPYing piece of HAPPY_on_a_long_stick +Output from clean whole word: +Cassandra is a HAPPY piece of shit_on_a_long_stick diff --git a/profanity_filter.py b/profanity_filter.py index a2c7887..0a7db64 100644 --- a/profanity_filter.py +++ b/profanity_filter.py @@ -1,30 +1,109 @@ """ Uses a line separated file listing bad words as it's source -to check if a user submitted something inappropriate +to check if a user submitted something inappropriate. +Code modified from: https://github.com/jared-mess/profanity-filter -f = Filter('slut', clean_word='unicorn') -word = f.clean() -print word ->>slut +Modified by: Jeremy Becnel +Date 1/2/2016 + +Example of Code in test_profanity_filter.py + +Example Output--- +Original: +Cassandra is a fuCking piece of shit_on_a_long_stick +Output from clean_anywhere: +CHAPPYandra is a HAPPYing piece of HAPPY_on_a_long_stick +Output from clean_start: +Cassandra is a HAPPYing piece of HAPPY_on_a_long_stick +Output from clean whole word: +Cassandra is a HAPPY piece of shit_on_a_long_stick """ import re +# bad word file location and and name +badwordfile = 'bad_words.txt' + + class Filter(object): """ - Replaces a bad word in a string with something more PG friendly - - Filter('you annoying prick', 'unicorn') + Class is desigend to take a string and clean it up by replacing + instances of "bad" words with a more acceptable word. """ - def __init__(self, original_string, clean_word='****'): - - bad_words_file = open('bad_words.txt', 'r') + + # class variable containing all the bad words we are looking for + bad_words = set(line.strip('\n') for line in open(badwordfile)) + + def __init__(self, original_string, replacement_string='****'): - self.bad_words = set(line.strip('\n') for line in open('bad_words.txt')) + #cls.bad_words = ['ass','fuck', 'shit' ] # used for testing self.original_string = original_string - self.clean_word = clean_word - - def clean(self): - exp = '(%s)' %'|'.join(self.bad_words) + self.replacement_string = replacement_string + self.profanity_found = None + self.__has_been_cleaned = False + self.clean_string = None + +#===================================INSTANCE METHODS + +#---------------------------standard get set methods with some error checking + + def get_original_string(self): + return self.original_string + + def get_replacement_string(self): + return self.replacement_string + + def is_profanity_found(self): + # check to see if a cleaning has been performed. + assert (self.__has_been_cleaned), "Word must be cleaned before this can be determined." + # after a clean is performed this method can be used to to determine if + # profanity was found + return self.profanity_found + + def get_clean_string(self): + # check to see if a cleaning has been performed. + assert (self.__has_been_cleaned), "Word must be cleaned before a clean string can be returned." + # after a clean is performed this method can be used to to determine if + # profanity was found + return self.clean_string + +#------------------------------cleaners + +# The methods below are instance methods that cleans the given string according +# to different rules. + + def __clean(self,exp): r = re.compile(exp, re.IGNORECASE) - return r.sub(self.clean_word, self.original_string) + # check for any profanity in the string + self.profanity_found = (r.search(self.original_string) != None) + # return the original string where the replacements string has been substituted for the profanity + self.clean_string = r.sub(self.replacement_string, self.original_string) + self.__has_been_cleaned = True + return self.clean_string + + # cleans profanity found anywhere in the word + # example with #cls.bad_words = ['ass','fuck', 'shit' ] # used for testing: + # cleans "Cassandra Fuck Off you shithead" + # as 'C****andra **** Off you ****head' + def clean_anywhere(self): + exp = '(%s)' %'|'.join(Filter.bad_words) + return self.__clean(exp) + + + # requires blank at beginning and end of word, i.e. word must start with profanity + # example with #cls.bad_words = ['ass','fuck', 'shit' ] # used for testing: + # cleans "Cassandra Fuck Off you shithead" + # as 'Cassandra **** Off you ****head' + def clean_start(self): + exp = '(\\b%s)' %'|\\b'.join(Filter.bad_words) + return self.__clean(exp) + + + # requires blank at beginning and end of word, i.e. will match whole word only + # example with #cls.bad_words = ['ass','fuck', 'shit' ] # used for testing: + # cleans "Cassandra Fuck Off you shithead" + # as 'Cassandra **** Off you shithead' + def clean_whole_word(self): + exp = '(\\b%s\\b)' %'\\b|\\b'.join(Filter.bad_words) + return self.__clean(exp) + diff --git a/test_profanity_filter.py b/test_profanity_filter.py new file mode 100644 index 0000000..ae00a3c --- /dev/null +++ b/test_profanity_filter.py @@ -0,0 +1,60 @@ +from profanity_filter import Filter + +text = "Cassandra is a fuCking piece of shit_on_a_long_stick" + +print "We first test the three cleaning methods on the message:" +print text +print + +f=Filter(text, "HAPPY") +print "Output from clean_anywhere:" +print f.clean_anywhere() +print "Output from clean_start:" +print f.clean_start() +print "Output from clean whole word:" +print f.clean_whole_word() + + +print + +text = "Cassy is an asset to our company." +print "We now test the profanity check with the following:" +print text +print "Output from clean_anywhere:" + +f=Filter(text,'HAPPY') +f.clean_anywhere() +if not f.is_profanity_found(): + print "No profanity." +else: + print "Here is the clean anywhere:" + print f.get_clean_string() + + +print +print "Output from clean_start:" +print f.clean_start() +if not f.is_profanity_found(): + print "No profanity." +else: + print "Here is the clean string:" + print f.get_clean_string() + +print +print "Output from clean whole word:" +print f.clean_whole_word() +if not f.is_profanity_found(): + print "No profanity." +else: + print "Here is the clean string:" + print f.get_clean_string() + + + + + +#print "We now make sure the error handling is working. An error should occur" +#f = Filter(text, "SAD") +#print f.is_profanity_found() +#print f.get_clean_string() +