From 7fbe0e04cb2915c8b89a36bfbe06dc38eba6bca5 Mon Sep 17 00:00:00 2001 From: "Takacs, David" Date: Fri, 12 Sep 2014 12:28:12 +0200 Subject: [PATCH] Leave a space in the text in place of every other white space. This fixes a bug when the text is multi-line, a line ends with non-whitespace character and the next line starts with a word, then it stripped the last and the first words together. --- ner/client.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ner/client.py b/ner/client.py index a532b8d..c4c81fb 100644 --- a/ner/client.py +++ b/ner/client.py @@ -116,8 +116,7 @@ def tag_text(self, text): :param text: raw text string to tag :returns: tagged text in given output format """ - for s in ('\f', '\n', '\r', '\t', '\v'): #strip whitespaces - text = text.replace(s, '') + text = re.sub(r'\s+', ' ', text) #collapse whitespaces text += '\n' #ensure end-of-line with tcpip4_socket(self.host, self.port) as s: if not isinstance(text, bytes): @@ -147,8 +146,7 @@ def tag_text(self, text): :param text: raw text strig to tag :returns: tagged text in given output format """ - for s in ('\f', '\n', '\r', '\t', '\v'): #strip whitespaces - text = text.replace(s, '') + text = re.sub(r'\s+', ' ', text) #collapse whitespaces text += '\n' #ensure end-of-line with http_connection(self.host, self.port) as c: headers = {'Content-type': 'application/x-www-form-urlencoded', 'Accept' : 'text/plain'}