From 1a3c9ef9b7fb1ccbce796d5c82ec77edb495570e Mon Sep 17 00:00:00 2001 From: findlaysmith Date: Tue, 10 Apr 2018 21:58:09 +0100 Subject: [PATCH 1/3] change translate feature to translate any messages not in the channel language --- config.ini.dist | 1 + ircbot/plugins/translate.py | 26 ++++++++++++++++++++++++++ ircbot/plugins/translate_ru.py | 31 ------------------------------- 3 files changed, 27 insertions(+), 31 deletions(-) create mode 100644 ircbot/plugins/translate.py delete mode 100644 ircbot/plugins/translate_ru.py diff --git a/config.ini.dist b/config.ini.dist index c761d2c..4c57655 100644 --- a/config.ini.dist +++ b/config.ini.dist @@ -26,6 +26,7 @@ redis_url = redis://localhost:6379/0 redis_prefix = botbot- plugin_dirs = /home/skull/dev/botbot-plugins plugin_blacklist = +lang = en [Links] mode = whitelist diff --git a/ircbot/plugins/translate.py b/ircbot/plugins/translate.py new file mode 100644 index 0000000..f9ba748 --- /dev/null +++ b/ircbot/plugins/translate.py @@ -0,0 +1,26 @@ +import requests +import json + +from ircbot import bot + +def translate(bot,text): + api_key = bot.config['Yandex']['translate_key'] + lang = check_lang(bot, text) + translate_url = "https://translate.yandex.net/api/v1.5/tr.json/translate?key={}&text={}&lang={}-{}".format(api_key, text, lang, bot.config["System"]["lang"]) + api_response = requests.get(translate_url).text + api_json = json.loads(api_response) + print(api_json) + return api_json['text'][0] + +def check_lang(bot,text): + api_key = bot.config['Yandex']['translate_key'] + detect_url = "https://translate.yandex.net/api/v1.5/tr.json/detect?key={}&text={}".format(api_key, text) + detect_response = requests.get(detect_url).text + detect_json = json.loads(detect_response) + return detect_json["text"] + + +@bot.hook() +def message_hook(bot, channel, sender, message): + if check_lang(bot, message) != bot.config["System"]["lang"]: + bot.message(channel, "translation: {}").format(translate(bot, message)) diff --git a/ircbot/plugins/translate_ru.py b/ircbot/plugins/translate_ru.py deleted file mode 100644 index de20f80..0000000 --- a/ircbot/plugins/translate_ru.py +++ /dev/null @@ -1,31 +0,0 @@ -import requests -import json - -from ircbot import bot - -cyrillic_latin = {u'а': 'a', u'б': 'b', u'в': 'v', u'г': 'g', - u'д': 'd', u'е': 'ye', u'ё': 'yo', u'ж': 'zh', - u'з': 'z', u'и': 'i', u'й': 'j', u'к': 'k', - u'л': 'l', u'м': 'm', u'н': 'n', u'о': 'o', - u'п': 'p', u'р': 'r', u'с': 's', u'т': 't', - u'у': 'u', u'ф': 'f', u'х': 'h', u'ц': 'ts', - u'ч': 'ch', u'ш': 'sh', u'щ': 'sch', u'ы': 'i', - u'э': 'e', u'ю': 'yu', u'я': 'ya', u' ': ' ', - u'ъ': '', u'ь': ''} - - -def translate_text(bot, text): - api_key = bot.config['Yandex']['translate_key'] - api_url = "https://translate.yandex.net/api/v1.5/tr.json/translate?key={}&text={}&lang=ru-en".format(api_key, text) - api_response = requests.get(api_url).text - api_json = json.loads(api_response) - print(api_json) - return api_json['text'][0] - - -@bot.hook() -def message_hook(bot, channel, sender, message): - if set(message) <= set(cyrillic_latin.keys()) and message.strip(): - translit = ''.join(str(c) for c in map(lambda x: cyrillic_latin[x], message)) - translate = translate_text(bot, message) - bot.message(channel, '{} <{}> ~ {}'.format(message, translit, translate)) From 9e7222d5f19283566a8d3a7b9c40a9f1876089a5 Mon Sep 17 00:00:00 2001 From: findlaysmith Date: Tue, 10 Apr 2018 22:01:36 +0100 Subject: [PATCH 2/3] I may have forgot to check with the linter... --- ircbot/plugins/translate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ircbot/plugins/translate.py b/ircbot/plugins/translate.py index f9ba748..8fc4281 100644 --- a/ircbot/plugins/translate.py +++ b/ircbot/plugins/translate.py @@ -3,7 +3,8 @@ from ircbot import bot -def translate(bot,text): + +def translate(bot, text): api_key = bot.config['Yandex']['translate_key'] lang = check_lang(bot, text) translate_url = "https://translate.yandex.net/api/v1.5/tr.json/translate?key={}&text={}&lang={}-{}".format(api_key, text, lang, bot.config["System"]["lang"]) @@ -12,7 +13,8 @@ def translate(bot,text): print(api_json) return api_json['text'][0] -def check_lang(bot,text): + +def check_lang(bot, text): api_key = bot.config['Yandex']['translate_key'] detect_url = "https://translate.yandex.net/api/v1.5/tr.json/detect?key={}&text={}".format(api_key, text) detect_response = requests.get(detect_url).text From ab12fbeae17e27e412d639d0da026e41cb92fbb0 Mon Sep 17 00:00:00 2001 From: findlaysmith Date: Tue, 10 Apr 2018 23:53:43 +0100 Subject: [PATCH 3/3] changing language detection to use offline module, for IO load/privacy --- ircbot/plugins/translate.py | 14 ++++---------- requirements.txt | 1 + 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/ircbot/plugins/translate.py b/ircbot/plugins/translate.py index 8fc4281..ffcc3f5 100644 --- a/ircbot/plugins/translate.py +++ b/ircbot/plugins/translate.py @@ -1,12 +1,13 @@ import requests import json +from langdetect import detect, detect_langs from ircbot import bot def translate(bot, text): api_key = bot.config['Yandex']['translate_key'] - lang = check_lang(bot, text) + lang = detect(text) translate_url = "https://translate.yandex.net/api/v1.5/tr.json/translate?key={}&text={}&lang={}-{}".format(api_key, text, lang, bot.config["System"]["lang"]) api_response = requests.get(translate_url).text api_json = json.loads(api_response) @@ -14,15 +15,8 @@ def translate(bot, text): return api_json['text'][0] -def check_lang(bot, text): - api_key = bot.config['Yandex']['translate_key'] - detect_url = "https://translate.yandex.net/api/v1.5/tr.json/detect?key={}&text={}".format(api_key, text) - detect_response = requests.get(detect_url).text - detect_json = json.loads(detect_response) - return detect_json["text"] - - @bot.hook() def message_hook(bot, channel, sender, message): - if check_lang(bot, message) != bot.config["System"]["lang"]: + # gibberish messages are still assigned a language, however the level of confidence is always less than 0.9 + if detect(message) != bot.config["System"]["lang"] and float(str(detect_langs(message)[0])[3:]) < 0.9: bot.message(channel, "translation: {}").format(translate(bot, message)) diff --git a/requirements.txt b/requirements.txt index 631d832..b87a334 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,3 +25,4 @@ traitlets==4.0.0 urbandictionary==1.1 dnspython==1.15.0 raven==6.4.0 +langdetect==1.0.7