-
Notifications
You must be signed in to change notification settings - Fork 2
Text To Speech #30
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Text To Speech #30
Changes from all commits
0a5b1d6
b27dca5
4f4fe66
122b141
18c708b
8d85bae
613d146
5ce3617
91db718
f0019a3
75b9b5d
b8f392c
772aafc
dc3cc4a
985c05e
c2c4b75
2271a65
07da894
74b07d4
f6d7af9
29b43e1
b63c9b2
0fef2c7
28d0e2e
cd1fcc2
63cfc73
3f62955
065c8c1
a8701d9
84248da
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,165 @@ | ||
| """ | ||
| Custom command to populate text model with definition, examples, and images | ||
| """ | ||
| import string | ||
| import nltk | ||
| import Levenshtein as Lev | ||
|
|
||
|
|
||
| # Modified Code from Bing library | ||
| def get_sentences(text): | ||
| """ | ||
| This function, when given a string of words, breaks it apart into individual | ||
| sentences through markers of sentence breaks and line breaks(\n) | ||
|
|
||
| :param text: str, any text | ||
| :return: list containing the fragments of the broken down text | ||
| """ | ||
| # breaks up text by line breaks first | ||
| lines = [p for p in text.split('\n') if p] | ||
| sentences = [] | ||
|
|
||
| # breaks up each line by sentence markers | ||
| for line in lines: | ||
| sentences.extend(nltk.tokenize.sent_tokenize(line)) | ||
|
|
||
| return sentences | ||
|
|
||
|
|
||
| def correct_sentence(given_sent, correct_sent): | ||
| """ | ||
| Function takes the user's sentence and the correct sentence and compares them to find missing | ||
| words, the incorrect words, and a grade | ||
|
|
||
| :param given_sent: str, sentence the user types into the text box | ||
| :param correct_sent: str, the correct sentence that the instructor inputs | ||
| :return: dictionary with parameters holding the missing words, correct words, | ||
| word/correctness at each index | ||
| """ | ||
|
|
||
| grade = {} | ||
|
|
||
| # tokenize given_sent and correct_sent and turn them both into lists | ||
| given_tok = nltk.tokenize.word_tokenize(given_sent.lower().translate( | ||
| str.maketrans('', '', string.punctuation))) | ||
| correct_tok = nltk.tokenize.word_tokenize(correct_sent.lower().translate( | ||
| str.maketrans('', '', string.punctuation))) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As a takeaway for the future, it's always worth considering how to make your code as readable as possible for the next developer. These long chained method calls with nested chained method certainly take me a second to parse fully, which is probably the impulse behind leaving a comment here. In this case, we could pull out the Or even increase the verbosity even further to improve readability: The above pattern does two things:
|
||
|
|
||
| # hold all the correct words and removed every word that is found to hold only | ||
| # the missing words | ||
| grade["missing"] = correct_tok.copy() | ||
|
|
||
| for word in given_tok: | ||
| if grade["missing"].count(word) != 0: | ||
| # remove the words that the user typed in to get the missing words | ||
| grade["missing"].remove(word) | ||
|
|
||
|
|
||
| grade["incorrect_word_index"] = [] | ||
|
|
||
| # True if the user inputs the same sentence as the correct answer | ||
| grade["isCorrect"] = given_tok == correct_tok | ||
|
|
||
| grade["words"] = [] | ||
|
|
||
| index = 0 | ||
| for ind_1, word in enumerate(given_tok): | ||
| match_found = False | ||
|
|
||
| # loop until the match is found or all words are looked at and no match is found | ||
| for ind_2, match in enumerate(correct_tok[index:]): | ||
|
|
||
| if word == match: | ||
| match_found = True | ||
| match_index = ind_2 + index | ||
| break | ||
|
|
||
| # if the match is found then the grade is correct, incorrect otherwise | ||
| # increase match_index by 1 if the match is found | ||
| if match_found: | ||
| grade["words"].append({"word": word, | ||
| "grade": "correct"}) | ||
| index = match_index + 1 | ||
| else: | ||
| grade["incorrect_word_index"].append(ind_1) | ||
| grade["words"].append({"word": word, | ||
| "grade": "incorrect"}) | ||
|
|
||
| # incorrect indices | ||
| for word_index in grade["incorrect_word_index"]: | ||
|
|
||
| # find the most similar word | ||
| sim_word = most_similar_word(given_tok[word_index], grade["missing"]) | ||
|
|
||
| if sim_word is not None: | ||
| word_grade = correct_words(given_tok[word_index], sim_word) | ||
| grade["words"][word_index]["word_grade"] = word_grade | ||
|
|
||
| return grade | ||
|
|
||
|
|
||
| def most_similar_word(word, comparisons): | ||
| """ | ||
| Take a word and find the most similar word in a given list | ||
|
|
||
| :param word: string with one word | ||
| :param comparisons: list of words | ||
| :return: string, most similar word in comparisons to word | ||
| """ | ||
| min_lev_val = None | ||
| min_lev_word = None | ||
|
|
||
| # check the missing words and see how similar they are | ||
| for current_word in comparisons: | ||
| current_val = Lev.distance(word, current_word) | ||
|
|
||
| # see if the word is less or more similar | ||
| if min_lev_val is None or min_lev_val > current_val: | ||
| min_lev_val = current_val | ||
| min_lev_word = current_word | ||
|
|
||
| # return the most similar word | ||
| return min_lev_word | ||
|
|
||
|
|
||
| def correct_words(given_word, correct_word): | ||
| """ | ||
| Determine which letters in a input word are correct and incorrect | ||
|
|
||
| :param given_word: string with one word, the incorrect word from the user's input | ||
| :param correct_word: string with one word, the similar word that is correct | ||
| :return: | ||
| """ | ||
| grade = {} | ||
|
|
||
| char_missing = [] | ||
|
|
||
| char_missing[:0] = correct_word | ||
|
|
||
| # remove the character if the character is there, find out how many are missing | ||
| for char in given_word: | ||
| if char_missing.count(char) != 0: | ||
| char_missing.remove(char) | ||
|
|
||
| grade["missing"] = list(char_missing) | ||
| grade["letters"] = [] | ||
|
|
||
| # for each character, hold whether it is correct or incorrect | ||
| index = 0 | ||
| for char in given_word: | ||
| match_found = False | ||
| for ind_2, match in enumerate(correct_word[index:]): | ||
|
|
||
| if char == match: | ||
| match_found = True | ||
| match_index = ind_2 + index | ||
| break | ||
| if match_found: | ||
| grade["letters"].append({"char": char, | ||
| "grade": "correct"}) | ||
| index = match_index + 1 | ||
| else: | ||
| grade["letters"].append({"char": char, | ||
| "grade": "incorrect"}) | ||
|
|
||
| return grade | ||
Uh oh!
There was an error while loading. Please reload this page.