From 83c60471260f7043062439ce81efa9071e023e23 Mon Sep 17 00:00:00 2001 From: DRTAG Date: Fri, 12 Apr 2024 17:31:24 +0900 Subject: [PATCH 1/3] Update spell_checker.py error fix --- hanspell/spell_checker.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hanspell/spell_checker.py b/hanspell/spell_checker.py index 1f0968e..b965de4 100644 --- a/hanspell/spell_checker.py +++ b/hanspell/spell_checker.py @@ -46,7 +46,9 @@ def check(text): payload = { 'color_blindness': '0', - 'q': text + 'q': text, + 'passportKey': '95a99f5509c89fb471fd3a5ad4c3477f8240dc11', + '_callback': 'jQuery112403254791378319444_1712907594516' } headers = { @@ -57,7 +59,10 @@ def check(text): start_time = time.time() r = _agent.get(base_url, params=payload, headers=headers) passed_time = time.time() - start_time - + r = r.text + start = r.index('({')+1 + end = r.index('})')+1 + r=r[start:end] data = json.loads(r.text) html = data['message']['result']['html'] result = { From 71179984415e53a5ed592e122ad5d1473b6af18a Mon Sep 17 00:00:00 2001 From: DRTAG Date: Fri, 12 Apr 2024 17:39:49 +0900 Subject: [PATCH 2/3] Update spell_checker.py Update code --- hanspell/spell_checker.py | 60 ++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/hanspell/spell_checker.py b/hanspell/spell_checker.py index b965de4..ae54a93 100644 --- a/hanspell/spell_checker.py +++ b/hanspell/spell_checker.py @@ -7,6 +7,9 @@ import json import time import sys +import re +from cachetools import TTLCache +from urllib import parse from collections import OrderedDict import xml.etree.ElementTree as ET @@ -17,7 +20,24 @@ _agent = requests.Session() PY3 = sys.version_info[0] == 3 +cache = TTLCache(maxsize = 10, ttl = 3600) +def read_token(): + try: + TOKEN = cache.get('PASSPORT_TOKEN') + return TOKEN + except KeyError: + return None + +def update_token(agent): + + html = agent.get(url='https://search.naver.com/search.naver?where=nexearch&sm=top_hty&fbm=1&ie=utf8&query=맞춤법검사기') + + match = re.search('passportKey=([a-zA-Z0-9]+)', html.text) + if match is not None: + TOKEN = parse.unquote(match.group(1)) + cache['PASSPORT_TOKEN'] = TOKEN + return TOKEN def _remove_tags(text): text = u'{}'.format(text).replace('
','') @@ -28,6 +48,30 @@ def _remove_tags(text): return result +def get_response(TOKEN, text): + + if TOKEN is None: + TOKEN = update_token(_agent) + + payload = { + 'passportKey' : TOKEN, + 'q': text, + 'color_blindness': 0 + } + + headers = { + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', + 'referer': 'https://search.naver.com/', + } + + r = _agent.get(base_url, params=payload, headers=headers) + data = json.loads(r.text) + + if 'error' in data['message'] : + r = get_response(update_token(_agent), text) + + return r + def check(text): """ @@ -43,21 +87,9 @@ def check(text): # 최대 500자까지 가능. if len(text) > 500: return Checked(result=False) - - payload = { - 'color_blindness': '0', - 'q': text, - 'passportKey': '95a99f5509c89fb471fd3a5ad4c3477f8240dc11', - '_callback': 'jQuery112403254791378319444_1712907594516' - } - - headers = { - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', - 'referer': 'https://search.naver.com/', - } - + start_time = time.time() - r = _agent.get(base_url, params=payload, headers=headers) + r = get_response(read_token(), text) passed_time = time.time() - start_time r = r.text start = r.index('({')+1 From 2ba2639975d55f98bea4878fd31ec8f03385f4fd Mon Sep 17 00:00:00 2001 From: DRTAG Date: Fri, 12 Apr 2024 17:45:28 +0900 Subject: [PATCH 3/3] Update spell_checker.py --- hanspell/spell_checker.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hanspell/spell_checker.py b/hanspell/spell_checker.py index ae54a93..17e372e 100644 --- a/hanspell/spell_checker.py +++ b/hanspell/spell_checker.py @@ -91,10 +91,6 @@ def check(text): start_time = time.time() r = get_response(read_token(), text) passed_time = time.time() - start_time - r = r.text - start = r.index('({')+1 - end = r.index('})')+1 - r=r[start:end] data = json.loads(r.text) html = data['message']['result']['html'] result = {