Skip to content

Commit fc3b77e

Browse files
committed
remove redundant imports + commented out sections
1 parent 38d0cc4 commit fc3b77e

File tree

3 files changed

+1
-33
lines changed

3 files changed

+1
-33
lines changed

rpunct/number_recoverer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
Module supporting punctuation recovery and post-processing of raw STT output.
55
"""
66
import re
7+
import decimal
78
from num2words import num2words
89
from number_parser import parse as number_parser, parse_number as individual_number_parser
9-
import decimal
1010

1111
try:
1212
from rpunct.utils import *

rpunct/punctuate.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,10 @@
55
__email__ = "daulet.nurmanbetov@gmail.com"
66

77
import re
8-
import os
98
import json
109
import warnings
11-
from time import time
12-
from tqdm import tqdm
1310
from simpletransformers.ner import NERModel
1411

15-
from langdetect import detect
16-
1712
# VALID_LABELS = ["OU", "OO", ".O", "!O", ",O", ".U", "!U", ",U", ":O", ";O", ":U", "'O", "-O", "?O", "?U"]
1813
# PUNCT_LABELS = ['O', '.', ',', ':', ';', "'", '-', '?', '!', '%']
1914
PUNCT_LABELS = ['O', '.', ',', ':', ';', "'", '-', '?', '!']

rpunct/rpunct_recoverer.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -327,24 +327,6 @@ def itemise_segment(self, original_segment:list, recovered_segment:list) -> list
327327
index_orig += 1
328328
index_rec += 1
329329

330-
# # Verify all recovered words have been itemised
331-
# try:
332-
# assert index_rec == len(recovered_segment), \
333-
# f"While reconstructing segment structure, one or more recovered words have been missed. \
334-
# \n Original text: {[item.content for item in original_segment]} \
335-
# \n Recovered text: {[item.content for item in recovered_segment]}"
336-
# except AttributeError:
337-
# assert index_rec == len(recovered_segment), \
338-
# f"While reconstructing segment structure, one or more recovered words have been missed. \
339-
# \n Original text: {[item.content for item in original_segment]} \
340-
# \n Recovered text: {[item for item in recovered_segment]}"
341-
342-
# # Verify that the reconstructed segment is the same length as original (excluding words removed by hyphenation)
343-
# assert len(recovered_segment) == (len(original_segment) - total_fewer_words), \
344-
# f"While reconstructing segment structure, a mistake has occured. \
345-
# \n Original text: {[item.content for item in original_segment]} \
346-
# \n Recovered text: {[item.content for item in recovered_segment]}"
347-
348330
# Return new itemised segment to the list of segments
349331
return output_segment
350332

@@ -378,15 +360,6 @@ def calc_end_item_index(self, plaintext_items_lst, recovered_words_lst, position
378360
orig_text_removals = original_segment_words.index('pence')
379361
punct_text_removals = 0
380362

381-
# elif recovered_word.startswith('£') and not original_segment_words[0].startswith('£'):
382-
# numerical_removals = self.find_subword_index(['pound', 'pounds'], original_segment_words, recovered_words_lst, position)
383-
# elif recovered_word.startswith('$') and not original_segment_words[0].startswith('$'):
384-
# numerical_removals = self.find_subword_index(['dollar', 'dollars'], original_segment_words, recovered_words_lst, position)
385-
# elif recovered_word.startswith('€') and not original_segment_words[0].startswith('€'):
386-
# numerical_removals = self.find_subword_index(['euro', 'euros'], original_segment_words, recovered_words_lst, position)
387-
# elif recovered_word.startswith('¥') and not original_segment_words[0].startswith('¥') and original_segment_words.count('yen') > 0:
388-
# numerical_removals = original_segment_words.index('yen')
389-
390363
else:
391364
# Align original natural language numbers to recovered digits
392365
mapping = align_texts(original_segment_words, recovered_words_lst, position)

0 commit comments

Comments
 (0)