From ea41da4102f1a09174b421bfc0be6e25c5ab3a2e Mon Sep 17 00:00:00 2001 From: vasyoid Date: Wed, 13 Mar 2019 00:29:26 +0300 Subject: [PATCH 01/27] Create project structure and implement tcp connection --- http-proxy/acceptor.py | 40 +++++++++++++++++++++++++++++++++++++ http-proxy/cache.py | 10 ++++++++++ http-proxy/connection.py | 43 ++++++++++++++++++++++++++++++++++++++++ http-proxy/main.py | 11 ++++++++++ http-proxy/message.py | 23 +++++++++++++++++++++ http-proxy/worker.py | 23 +++++++++++++++++++++ 6 files changed, 150 insertions(+) create mode 100644 http-proxy/acceptor.py create mode 100644 http-proxy/cache.py create mode 100644 http-proxy/connection.py create mode 100644 http-proxy/main.py create mode 100644 http-proxy/message.py create mode 100644 http-proxy/worker.py diff --git a/http-proxy/acceptor.py b/http-proxy/acceptor.py new file mode 100644 index 0000000..7a80444 --- /dev/null +++ b/http-proxy/acceptor.py @@ -0,0 +1,40 @@ +from socket import socket, AF_INET, SOCK_STREAM, SOL_SOCKET, SO_REUSEADDR, SHUT_WR +from threading import Thread, RLock + +from cache import Cache +from worker import Worker + +BACKLOG_SIZE = 10 + + +class Acceptor(Thread): + def __init__(self, address, port): + super().__init__() + self.__address = address + self.__port = port + self.__server_socket = socket(AF_INET, SOCK_STREAM) + self.__server_socket.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1) + self.__server_socket.bind((address, port)) + self.__server_socket.listen(BACKLOG_SIZE) + self.__interrupted_mutex = RLock() + self.__interrupted = False + + def interrupt(self): + with self.__interrupted_mutex: + socket(AF_INET, SOCK_STREAM).connect((self.__address, self.__port)) + self.__interrupted = True + + def run(self): + workers = [] + cache = Cache() + while True: + (client_socket, _) = self.__server_socket.accept() + with self.__interrupted_mutex: + if self.__interrupted: + break + worker = Worker(client_socket, cache) + workers.append(worker) + worker.start() + self.__server_socket.shutdown(SHUT_WR) + for worker in workers: + worker.join() diff --git a/http-proxy/cache.py b/http-proxy/cache.py new file mode 100644 index 0000000..78d3a21 --- /dev/null +++ b/http-proxy/cache.py @@ -0,0 +1,10 @@ +class Cache: + + def __init__(self): + pass + + def get(self, message): + pass + + def put(self, message): + pass diff --git a/http-proxy/connection.py b/http-proxy/connection.py new file mode 100644 index 0000000..9ea0f7a --- /dev/null +++ b/http-proxy/connection.py @@ -0,0 +1,43 @@ +from socket import socket, gethostbyname, AF_INET, SOCK_STREAM, SHUT_WR + +MAX_CHUNK_LEN = 1024 +HTTP_PORT = 80 + + +class Connection: + + def __init__(self, sock=None): + self.__socket = sock + + def establish(self, url): + self.__socket = socket(AF_INET, SOCK_STREAM) + host = gethostbyname(url) + self.__socket.connect((host, HTTP_PORT)) + + def close(self): + self.__socket.shutdown(SHUT_WR) + + def __receive(self): + chunks = [] + total_received = 0 + while True: + chunk = self.__socket.recv(MAX_CHUNK_LEN) + pass + chunks.append(chunk) + print(chunk) + total_received += len(chunk) + return b''.join(chunks) + + def __send(self, msg): + total_sent = 0 + while total_sent < msg.len: + sent = self.__socket.send(msg[total_sent:]) + if sent == 0: + break + total_sent += sent + + def receive_message(self): + pass + + def send_message(self, message): + pass diff --git a/http-proxy/main.py b/http-proxy/main.py new file mode 100644 index 0000000..bf48bbb --- /dev/null +++ b/http-proxy/main.py @@ -0,0 +1,11 @@ +from sys import argv +from acceptor import Acceptor + + +if __name__ == '__main__': + port = int(argv[1]) + my_server = Acceptor('', port) + my_server.start() + x = input("input anything to exit") + my_server.interrupt() + my_server.join() diff --git a/http-proxy/message.py b/http-proxy/message.py new file mode 100644 index 0000000..233ac17 --- /dev/null +++ b/http-proxy/message.py @@ -0,0 +1,23 @@ +class Message: + + def __init__(self, start_line): + self.__body = "" + self.__start_line = start_line + + def add_header(self, header): + pass + + def set_body(self, body): + self.__body = body + + def get_body(self): + return self.__body + + def get_status(self): + pass + + def to_str(self): + pass + + def get_host(self): + pass diff --git a/http-proxy/worker.py b/http-proxy/worker.py new file mode 100644 index 0000000..a41c446 --- /dev/null +++ b/http-proxy/worker.py @@ -0,0 +1,23 @@ +from threading import Thread +from connection import Connection + + +class Worker(Thread): + + def __init__(self, sock, cache): + super().__init__() + self.__socket = sock + self.__cache = cache + + def run(self): + client_connection = Connection(self.__socket) + request = client_connection.receive_message() + response = self.__cache.get(request) + if response is None: + server_connection = Connection() + server_connection.establish(request.get_host()) + server_connection.send_message(request) + response = server_connection.receive_message() + server_connection.close() + client_connection.send_message(response) + client_connection.close() From 5ab4004b66b2913e0347f2e23a44ae121946fc33 Mon Sep 17 00:00:00 2001 From: vasyoid Date: Wed, 13 Mar 2019 11:33:01 +0300 Subject: [PATCH 02/27] Add workers interruption on exit --- http-proxy/acceptor.py | 1 + http-proxy/worker.py | 11 +++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/http-proxy/acceptor.py b/http-proxy/acceptor.py index 7a80444..2989833 100644 --- a/http-proxy/acceptor.py +++ b/http-proxy/acceptor.py @@ -37,4 +37,5 @@ def run(self): worker.start() self.__server_socket.shutdown(SHUT_WR) for worker in workers: + worker.interrupt() worker.join() diff --git a/http-proxy/worker.py b/http-proxy/worker.py index a41c446..a17af7d 100644 --- a/http-proxy/worker.py +++ b/http-proxy/worker.py @@ -8,10 +8,13 @@ def __init__(self, sock, cache): super().__init__() self.__socket = sock self.__cache = cache + self.__client_connection = Connection(self.__socket) + + def interrupt(self): + self.__client_connection.close() def run(self): - client_connection = Connection(self.__socket) - request = client_connection.receive_message() + request = self.__client_connection.receive_message() response = self.__cache.get(request) if response is None: server_connection = Connection() @@ -19,5 +22,5 @@ def run(self): server_connection.send_message(request) response = server_connection.receive_message() server_connection.close() - client_connection.send_message(response) - client_connection.close() + self.__client_connection.send_message(response) + self.__client_connection.close() From a72cb1e25ffc0841f14046063cba02c6ad52abfe Mon Sep 17 00:00:00 2001 From: vasyoid Date: Wed, 13 Mar 2019 14:07:20 +0300 Subject: [PATCH 03/27] Start implementing httparser --- http-proxy/connection.py | 24 +++++++++--------------- http-proxy/httparser.py | 34 ++++++++++++++++++++++++++++++++++ http-proxy/main.py | 4 ++-- http-proxy/message.py | 6 +++++- 4 files changed, 50 insertions(+), 18 deletions(-) create mode 100644 http-proxy/httparser.py diff --git a/http-proxy/connection.py b/http-proxy/connection.py index 9ea0f7a..1f0a0c2 100644 --- a/http-proxy/connection.py +++ b/http-proxy/connection.py @@ -1,5 +1,7 @@ from socket import socket, gethostbyname, AF_INET, SOCK_STREAM, SHUT_WR +from httparser import HTTParser + MAX_CHUNK_LEN = 1024 HTTP_PORT = 80 @@ -17,27 +19,19 @@ def establish(self, url): def close(self): self.__socket.shutdown(SHUT_WR) - def __receive(self): - chunks = [] - total_received = 0 + def receive_message(self): + parser = HTTParser() while True: chunk = self.__socket.recv(MAX_CHUNK_LEN) - pass - chunks.append(chunk) - print(chunk) - total_received += len(chunk) - return b''.join(chunks) + message = parser.append(chunk) + if message is not None: + return message - def __send(self, msg): + def send_message(self, message): + msg = message.to_str() total_sent = 0 while total_sent < msg.len: sent = self.__socket.send(msg[total_sent:]) if sent == 0: break total_sent += sent - - def receive_message(self): - pass - - def send_message(self, message): - pass diff --git a/http-proxy/httparser.py b/http-proxy/httparser.py new file mode 100644 index 0000000..ea3e6c3 --- /dev/null +++ b/http-proxy/httparser.py @@ -0,0 +1,34 @@ +from message import Message +from enum import Enum +from collections import deque + + +class HTTParser: + + def __init__(self): + self.__message = Message() + self.__state = self.State.READING_START_LINE + self.__prefix = b"" + + def append(self, chunk): + tokens = self.__tokenize(chunk) + if self.__parse(tokens): + return self.__message + return None + + def __tokenize(self, chunk): + tokens = deque() + tokens.extend((self.__prefix + chunk).splitlines()) + if not chunk.endswith(b"\r\n"): + self.__prefix = tokens[-1] + tokens.pop() + else: + self.__prefix = b"" + return tokens + + + + class State(Enum): + READING_START_LINE = 1 + READING_HEADERS = 2 + READING_BODY = 3 diff --git a/http-proxy/main.py b/http-proxy/main.py index bf48bbb..7337549 100644 --- a/http-proxy/main.py +++ b/http-proxy/main.py @@ -2,9 +2,9 @@ from acceptor import Acceptor -if __name__ == '__main__': +if __name__ == "__main__": port = int(argv[1]) - my_server = Acceptor('', port) + my_server = Acceptor("", port) my_server.start() x = input("input anything to exit") my_server.interrupt() diff --git a/http-proxy/message.py b/http-proxy/message.py index 233ac17..449e8cd 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -1,7 +1,11 @@ class Message: - def __init__(self, start_line): + def __init__(self): self.__body = "" + self.__start_line = "" + self.__headers = [] + + def set_start_line(self, start_line): self.__start_line = start_line def add_header(self, header): From c046335c6fe7993e2306b092981269a808851339 Mon Sep 17 00:00:00 2001 From: vasyoid Date: Wed, 13 Mar 2019 14:54:41 +0300 Subject: [PATCH 04/27] Implement HTTParser --- http-proxy/httparser.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/http-proxy/httparser.py b/http-proxy/httparser.py index ea3e6c3..a25a8b3 100644 --- a/http-proxy/httparser.py +++ b/http-proxy/httparser.py @@ -1,13 +1,12 @@ from message import Message from enum import Enum -from collections import deque class HTTParser: def __init__(self): self.__message = Message() - self.__state = self.State.READING_START_LINE + self.__state = self.State.START_LINE self.__prefix = b"" def append(self, chunk): @@ -17,8 +16,7 @@ def append(self, chunk): return None def __tokenize(self, chunk): - tokens = deque() - tokens.extend((self.__prefix + chunk).splitlines()) + tokens = (self.__prefix + chunk).splitlines() if not chunk.endswith(b"\r\n"): self.__prefix = tokens[-1] tokens.pop() @@ -26,9 +24,22 @@ def __tokenize(self, chunk): self.__prefix = b"" return tokens - + def __parse(self, tokens): + for token in tokens: + if self.__state == self.State.START_LINE: + self.__message.set_start_line(token) + self.__state = self.State.HEADERS + elif self.__state == self.State.HEADERS: + if not token: + self.__state = self.State.BODY + continue + header = token.split(": ", maxsplit=1) + self.__message.add_header(header[0], header[1]) + elif self.__message.append_to_body(token): + return True + return False class State(Enum): - READING_START_LINE = 1 - READING_HEADERS = 2 - READING_BODY = 3 + START_LINE = 1 + HEADERS = 2 + BODY = 3 From b94f9a45590e151fd6fd8d38cc052449aaf73513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= =?UTF-8?q?=D0=B0=20=D0=9E=D1=80=D0=BB=D0=BE=D0=B2=D0=B0?= Date: Wed, 13 Mar 2019 17:48:47 +0300 Subject: [PATCH 05/27] add message and simple cache --- http-proxy/cache.py | 14 +++++++++----- http-proxy/message.py | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index 78d3a21..4935d34 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -1,10 +1,14 @@ class Cache: def __init__(self): - pass + self.__cache = {} - def get(self, message): - pass + def get(self, request): + if request.can_cache(): + return self.__cache.update(request) + else: + return None - def put(self, message): - pass + def put(self, request, response): + if request.can_cache() and response.can_cache(): + self.__cache[request] = response diff --git a/http-proxy/message.py b/http-proxy/message.py index 449e8cd..6edf98a 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -1,27 +1,47 @@ +import re + class Message: def __init__(self): self.__body = "" self.__start_line = "" - self.__headers = [] + self.__headers = {} def set_start_line(self, start_line): self.__start_line = start_line - def add_header(self, header): - pass + def add_header(self, header_title, header_value): + self.__headers[header_title] = header_value - def set_body(self, body): - self.__body = body + def append_to_body(self, body): + self.__body += body + if len(self.__body) == self.__get_body_len(): + return True + else: + return False def get_body(self): return self.__body def get_status(self): - pass + return self.__headers.get('Status:') def to_str(self): - pass + message = self.__start_line + for key, value in self.__headers.items(): + message += key + ' ' + value + message += '\r\n' + message += self.__body + return message def get_host(self): - pass + if self.__headers.get('Host:') is not None: + return re.split(r":", self.__headers.get('Host:'))[0] + else: + return None + + def can_cache(self): + return re.search(r"no-cache|no-store", self.__headers.get('Host:')) is None + + def __get_body_len(self): + return int(self.__headers.get('Content-Length:')) From 2237168c27fed0642ec97ec58e99e800c22fd88f Mon Sep 17 00:00:00 2001 From: vasyoid Date: Wed, 13 Mar 2019 18:36:36 +0300 Subject: [PATCH 06/27] Refactor message class --- http-proxy/message.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/http-proxy/message.py b/http-proxy/message.py index 6edf98a..b93b221 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -1,4 +1,5 @@ -import re +from re import split, search + class Message: @@ -24,24 +25,24 @@ def get_body(self): return self.__body def get_status(self): - return self.__headers.get('Status:') + return self.__headers.get("Status:") def to_str(self): message = self.__start_line for key, value in self.__headers.items(): - message += key + ' ' + value - message += '\r\n' + message += key + " " + value + message += "\r\n" message += self.__body return message def get_host(self): - if self.__headers.get('Host:') is not None: - return re.split(r":", self.__headers.get('Host:'))[0] + if self.__headers.get("Host:") is not None: + return split(r":", self.__headers.get("Host:"))[0] else: return None def can_cache(self): - return re.search(r"no-cache|no-store", self.__headers.get('Host:')) is None + return search(r"no-cache|no-store", self.__headers.get("Host:")) is None def __get_body_len(self): - return int(self.__headers.get('Content-Length:')) + return int(self.__headers.get("Content-Length:")) From 1bfee651c701d6d9fafaaf83001e6e087a861fb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= =?UTF-8?q?=D0=B0=20=D0=9E=D1=80=D0=BB=D0=BE=D0=B2=D0=B0?= Date: Wed, 13 Mar 2019 20:17:20 +0300 Subject: [PATCH 07/27] improve cache --- http-proxy/cache.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index 4935d34..30ae124 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -1,14 +1,37 @@ -class Cache: +import time +import sys + - def __init__(self): +class Cache: + def __init__(self, expire, max_size): self.__cache = {} + self.__expire = expire + self.__max_size = max_size def get(self, request): if request.can_cache(): - return self.__cache.update(request) + return self.__check_expire(request) else: return None def put(self, request, response): - if request.can_cache() and response.can_cache(): - self.__cache[request] = response + if request.can_cache() and response.can_cache() and self.__clear_old_cache: + self.__cache[request] = (time.time(), response) + + def __check_expire(self, request): + set_time, value = self.__cache.get(request) + if set_time + self.__expire > time.time(): + self.__cache.pop(request) + return None + else: + return value + + def __check_cache_size(self): + return sys.getsizeof(self.__cache) < self.__max_size + + def __clear_old_cache(self): + if not self.__check_cache_size(): + for key, set_time, value in dict.items(): + if set_time + self.__expire > time.time(): + self.__cache.pop(key) + return self.__check_cache_size() From d92d419b4548635a2238b2f4b2377f1fbb8dc3a4 Mon Sep 17 00:00:00 2001 From: vasyoid Date: Wed, 13 Mar 2019 23:55:10 +0300 Subject: [PATCH 08/27] Refactor message class --- http-proxy/message.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/http-proxy/message.py b/http-proxy/message.py index b93b221..e4104ae 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -1,11 +1,13 @@ from re import split, search +NEW_LINE = b"\r\n" + class Message: def __init__(self): - self.__body = "" - self.__start_line = "" + self.__body = b"" + self.__start_line = b"" self.__headers = {} def set_start_line(self, start_line): @@ -14,35 +16,33 @@ def set_start_line(self, start_line): def add_header(self, header_title, header_value): self.__headers[header_title] = header_value - def append_to_body(self, body): - self.__body += body - if len(self.__body) == self.__get_body_len(): - return True - else: - return False + def append_to_body(self, chunk): + self.__body += chunk + return len(self.__body) == self.__get_body_len() def get_body(self): return self.__body def get_status(self): - return self.__headers.get("Status:") + return self.__headers.get(b"Status:") def to_str(self): - message = self.__start_line - for key, value in self.__headers.items(): - message += key + " " + value - message += "\r\n" - message += self.__body + message = self.__start_line + NEW_LINE + message += NEW_LINE.join([b"%s %s" % header for header in self.__headers.items()]) + message += NEW_LINE * 2 + if self.__get_body_len() > 0: + message += self.__body + NEW_LINE return message def get_host(self): - if self.__headers.get("Host:") is not None: - return split(r":", self.__headers.get("Host:"))[0] + host = self.__headers.get(b"Host:") + if host: + return split(r":", host)[0] else: return None def can_cache(self): - return search(r"no-cache|no-store", self.__headers.get("Host:")) is None + return search(r"no-cache|no-store", self.__headers.get(b"Cache-Control:")) is None def __get_body_len(self): - return int(self.__headers.get("Content-Length:")) + return int(self.__headers.get(b"Content-Length:", b"0")) From 17f4fcf314e93e5cb1af1f687d0b5e88ebe27c9d Mon Sep 17 00:00:00 2001 From: vasyoid Date: Wed, 13 Mar 2019 23:56:05 +0300 Subject: [PATCH 09/27] Fix parsing HTTP body --- http-proxy/httparser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/http-proxy/httparser.py b/http-proxy/httparser.py index a25a8b3..178be67 100644 --- a/http-proxy/httparser.py +++ b/http-proxy/httparser.py @@ -32,8 +32,10 @@ def __parse(self, tokens): elif self.__state == self.State.HEADERS: if not token: self.__state = self.State.BODY + if self.__message.append_to_body(b""): + return True continue - header = token.split(": ", maxsplit=1) + header = token.split(b": ", maxsplit=1) self.__message.add_header(header[0], header[1]) elif self.__message.append_to_body(token): return True From f62f6cac1e5167ae6a6a78951aaa2e763565e06f Mon Sep 17 00:00:00 2001 From: vasyoid Date: Wed, 13 Mar 2019 23:58:49 +0300 Subject: [PATCH 10/27] Add newline to console start message --- http-proxy/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http-proxy/main.py b/http-proxy/main.py index 7337549..91eaa3c 100644 --- a/http-proxy/main.py +++ b/http-proxy/main.py @@ -6,6 +6,6 @@ port = int(argv[1]) my_server = Acceptor("", port) my_server.start() - x = input("input anything to exit") + x = input("input anything to exit\n") my_server.interrupt() my_server.join() From 585853bc5ff8470e2b115be9ff85f5c7b11883d1 Mon Sep 17 00:00:00 2001 From: vasyoid Date: Thu, 14 Mar 2019 00:13:10 +0300 Subject: [PATCH 11/27] Convert all binary strings to bin --- http-proxy/connection.py | 6 +++--- http-proxy/httparser.py | 10 +++++----- http-proxy/message.py | 16 ++++++++-------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/http-proxy/connection.py b/http-proxy/connection.py index 1f0a0c2..760bdfc 100644 --- a/http-proxy/connection.py +++ b/http-proxy/connection.py @@ -23,14 +23,14 @@ def receive_message(self): parser = HTTParser() while True: chunk = self.__socket.recv(MAX_CHUNK_LEN) - message = parser.append(chunk) + message = parser.append(chunk.decode()) if message is not None: return message def send_message(self, message): - msg = message.to_str() + msg = message.to_str().encode() total_sent = 0 - while total_sent < msg.len: + while total_sent < len(msg): sent = self.__socket.send(msg[total_sent:]) if sent == 0: break diff --git a/http-proxy/httparser.py b/http-proxy/httparser.py index 178be67..e302ef9 100644 --- a/http-proxy/httparser.py +++ b/http-proxy/httparser.py @@ -7,7 +7,7 @@ class HTTParser: def __init__(self): self.__message = Message() self.__state = self.State.START_LINE - self.__prefix = b"" + self.__prefix = "" def append(self, chunk): tokens = self.__tokenize(chunk) @@ -17,11 +17,11 @@ def append(self, chunk): def __tokenize(self, chunk): tokens = (self.__prefix + chunk).splitlines() - if not chunk.endswith(b"\r\n"): + if not chunk.endswith("\r\n"): self.__prefix = tokens[-1] tokens.pop() else: - self.__prefix = b"" + self.__prefix = "" return tokens def __parse(self, tokens): @@ -32,10 +32,10 @@ def __parse(self, tokens): elif self.__state == self.State.HEADERS: if not token: self.__state = self.State.BODY - if self.__message.append_to_body(b""): + if self.__message.append_to_body(""): return True continue - header = token.split(b": ", maxsplit=1) + header = token.split(": ", maxsplit=1) self.__message.add_header(header[0], header[1]) elif self.__message.append_to_body(token): return True diff --git a/http-proxy/message.py b/http-proxy/message.py index e4104ae..668aa4a 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -1,13 +1,13 @@ from re import split, search -NEW_LINE = b"\r\n" +NEW_LINE = "\r\n" class Message: def __init__(self): - self.__body = b"" - self.__start_line = b"" + self.__body = "" + self.__start_line = "" self.__headers = {} def set_start_line(self, start_line): @@ -24,25 +24,25 @@ def get_body(self): return self.__body def get_status(self): - return self.__headers.get(b"Status:") + return self.__headers.get("Status") def to_str(self): message = self.__start_line + NEW_LINE - message += NEW_LINE.join([b"%s %s" % header for header in self.__headers.items()]) + message += NEW_LINE.join(["%s %s" % header for header in self.__headers.items()]) message += NEW_LINE * 2 if self.__get_body_len() > 0: message += self.__body + NEW_LINE return message def get_host(self): - host = self.__headers.get(b"Host:") + host = self.__headers.get("Host") if host: return split(r":", host)[0] else: return None def can_cache(self): - return search(r"no-cache|no-store", self.__headers.get(b"Cache-Control:")) is None + return search(r"no-cache|no-store", self.__headers.get("Cache-Control")) is None def __get_body_len(self): - return int(self.__headers.get(b"Content-Length:", b"0")) + return int(self.__headers.get("Content-Length", "0")) From a49a8dc4ab3698cf84d4b216bd03276d33cf7ed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= =?UTF-8?q?=D0=B0=20=D0=9E=D1=80=D0=BB=D0=BE=D0=B2=D0=B0?= Date: Thu, 14 Mar 2019 00:51:14 +0300 Subject: [PATCH 12/27] fix message, add tests --- http-proxy/cache.py | 22 ++++++++++++++-------- http-proxy/message.py | 2 +- http-proxy/test.py | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 http-proxy/test.py diff --git a/http-proxy/cache.py b/http-proxy/cache.py index 30ae124..62c42da 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -1,5 +1,6 @@ import time import sys +from operator import itemgetter class Cache: @@ -9,29 +10,34 @@ def __init__(self, expire, max_size): self.__max_size = max_size def get(self, request): - if request.can_cache(): - return self.__check_expire(request) + if request.can_cache() and not self.__check_expire(request): + set_time, value = self.__cache.get(request) + return value else: return None def put(self, request, response): - if request.can_cache() and response.can_cache() and self.__clear_old_cache: + if request.can_cache() and response.can_cache(): self.__cache[request] = (time.time(), response) def __check_expire(self, request): set_time, value = self.__cache.get(request) - if set_time + self.__expire > time.time(): - self.__cache.pop(request) - return None + if set_time is not None: + if set_time + self.__expire > time.time(): + self.__cache.pop(request) + return True + else: + return False else: - return value + return True def __check_cache_size(self): return sys.getsizeof(self.__cache) < self.__max_size def __clear_old_cache(self): if not self.__check_cache_size(): - for key, set_time, value in dict.items(): + for key, set_time, value in self.__cache.items(): if set_time + self.__expire > time.time(): self.__cache.pop(key) + sorted(self.__cache.items(), key=itemgetter(1)) return self.__check_cache_size() diff --git a/http-proxy/message.py b/http-proxy/message.py index 668aa4a..0c768e4 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -28,7 +28,7 @@ def get_status(self): def to_str(self): message = self.__start_line + NEW_LINE - message += NEW_LINE.join(["%s %s" % header for header in self.__headers.items()]) + message += NEW_LINE.join(["%s: %s" % header for header in self.__headers.items()]) message += NEW_LINE * 2 if self.__get_body_len() > 0: message += self.__body + NEW_LINE diff --git a/http-proxy/test.py b/http-proxy/test.py new file mode 100644 index 0000000..1113a6e --- /dev/null +++ b/http-proxy/test.py @@ -0,0 +1,34 @@ +import unittest +from cache import Cache +from message import Message + +class Test(unittest.TestCase): + + def test_create_message(self): + message = Message() + message.set_start_line("GET /background.png HTTP/1.0") + message.add_header('Host', 'example.org') + message.add_header('Content-Length', '110') + message.append_to_body('Click here') + self.assertEqual('GET /background.png HTTP/1.0\r\nHost: example.org\r\nContent-Length: 110\r\n\r\nClick here\r\n', message.to_str()) + + def test_get_host_message(self): + message = Message() + message.set_start_line("GET /background.png HTTP/1.0") + message.add_header('Host', 'example.org') + message.add_header('Content-Length', '110') + message.append_to_body('Click here') + host = message.get_host() + self.assertEqual('example.org', host) + + def test_need_cash_message(self): + message = Message() + message.set_start_line("GET /background.png HTTP/1.0") + message.add_header('Host', 'example.org') + message.add_header('Content-Length', '110') + message.append_to_body('Click here') + can = message.can_cache() + message.add_header('Cache-Control', 'no-store') + cant = message.can_cache() + self.assertEqual(True, can) + self.assertEqual(False, cant) \ No newline at end of file From 77afbdb0fdfc211c666ad89dda6aff2866244bef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= =?UTF-8?q?=D0=B0=20=D0=9E=D1=80=D0=BB=D0=BE=D0=B2=D0=B0?= Date: Thu, 14 Mar 2019 01:06:27 +0300 Subject: [PATCH 13/27] finish cache, not tested --- http-proxy/cache.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index 62c42da..6462169 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -11,8 +11,7 @@ def __init__(self, expire, max_size): def get(self, request): if request.can_cache() and not self.__check_expire(request): - set_time, value = self.__cache.get(request) - return value + return self.__cache.get(request) else: return None @@ -39,5 +38,9 @@ def __clear_old_cache(self): for key, set_time, value in self.__cache.items(): if set_time + self.__expire > time.time(): self.__cache.pop(key) - sorted(self.__cache.items(), key=itemgetter(1)) - return self.__check_cache_size() + if not self.__check_cache_size(): + sorted_by_time_cache = sorted(self.__cache.items(), key=itemgetter(1)) + for key, value in sorted_by_time_cache: + self.__cache.pop(key) + if self.__check_cache_size(): + return From 06350a1cb12fb2c07467c6e00c5dde84598d03b7 Mon Sep 17 00:00:00 2001 From: vasyoid Date: Thu, 14 Mar 2019 01:58:04 +0300 Subject: [PATCH 14/27] Fix httparser --- http-proxy/connection.py | 6 ++--- http-proxy/httparser.py | 57 ++++++++++++++-------------------------- http-proxy/message.py | 8 +++--- http-proxy/worker.py | 2 ++ 4 files changed, 30 insertions(+), 43 deletions(-) diff --git a/http-proxy/connection.py b/http-proxy/connection.py index 760bdfc..e9dd5f6 100644 --- a/http-proxy/connection.py +++ b/http-proxy/connection.py @@ -23,12 +23,12 @@ def receive_message(self): parser = HTTParser() while True: chunk = self.__socket.recv(MAX_CHUNK_LEN) - message = parser.append(chunk.decode()) - if message is not None: + message = parser.append(chunk) + if message: return message def send_message(self, message): - msg = message.to_str().encode() + msg = message.to_bytes() total_sent = 0 while total_sent < len(msg): sent = self.__socket.send(msg[total_sent:]) diff --git a/http-proxy/httparser.py b/http-proxy/httparser.py index e302ef9..528dc80 100644 --- a/http-proxy/httparser.py +++ b/http-proxy/httparser.py @@ -1,47 +1,30 @@ -from message import Message -from enum import Enum +from message import Message, NEW_LINE_B class HTTParser: def __init__(self): self.__message = Message() - self.__state = self.State.START_LINE - self.__prefix = "" + self.__head = "" def append(self, chunk): - tokens = self.__tokenize(chunk) - if self.__parse(tokens): - return self.__message - return None + if self.__head is None: + return self.__parse_body_part(chunk) + if NEW_LINE_B * 2 not in chunk: + self.__head += chunk.decode() + return None + parts = chunk.split(NEW_LINE_B * 2, 1) + self.__head += parts[0].decode() + self.__parse_head() + return self.__parse_body_part(parts[1] if len(parts) > 1 else b"") - def __tokenize(self, chunk): - tokens = (self.__prefix + chunk).splitlines() - if not chunk.endswith("\r\n"): - self.__prefix = tokens[-1] - tokens.pop() - else: - self.__prefix = "" - return tokens + def __parse_head(self): + tokens = self.__head.splitlines() + self.__message.set_start_line(tokens[0]) + for token in tokens[1:]: + header = token.split(": ", maxsplit=1) + self.__message.add_header(header[0], header[1]) + self.__head = None - def __parse(self, tokens): - for token in tokens: - if self.__state == self.State.START_LINE: - self.__message.set_start_line(token) - self.__state = self.State.HEADERS - elif self.__state == self.State.HEADERS: - if not token: - self.__state = self.State.BODY - if self.__message.append_to_body(""): - return True - continue - header = token.split(": ", maxsplit=1) - self.__message.add_header(header[0], header[1]) - elif self.__message.append_to_body(token): - return True - return False - - class State(Enum): - START_LINE = 1 - HEADERS = 2 - BODY = 3 + def __parse_body_part(self, chunk): + return self.__message if self.__message.append_to_body(chunk) else None diff --git a/http-proxy/message.py b/http-proxy/message.py index 0c768e4..093d965 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -1,12 +1,13 @@ from re import split, search NEW_LINE = "\r\n" +NEW_LINE_B = b"\r\n" class Message: def __init__(self): - self.__body = "" + self.__body = b"" self.__start_line = "" self.__headers = {} @@ -26,12 +27,13 @@ def get_body(self): def get_status(self): return self.__headers.get("Status") - def to_str(self): + def to_bytes(self): message = self.__start_line + NEW_LINE message += NEW_LINE.join(["%s: %s" % header for header in self.__headers.items()]) message += NEW_LINE * 2 + message = message.encode() if self.__get_body_len() > 0: - message += self.__body + NEW_LINE + message += self.__body + NEW_LINE_B return message def get_host(self): diff --git a/http-proxy/worker.py b/http-proxy/worker.py index a17af7d..498073a 100644 --- a/http-proxy/worker.py +++ b/http-proxy/worker.py @@ -15,12 +15,14 @@ def interrupt(self): def run(self): request = self.__client_connection.receive_message() + print(request.to_bytes()) response = self.__cache.get(request) if response is None: server_connection = Connection() server_connection.establish(request.get_host()) server_connection.send_message(request) response = server_connection.receive_message() + print(response.to_bytes()) server_connection.close() self.__client_connection.send_message(response) self.__client_connection.close() From dc166b2ff47a7b2f344cc65fb38a6ff0fff0d54b Mon Sep 17 00:00:00 2001 From: vasyoid Date: Thu, 14 Mar 2019 02:30:01 +0300 Subject: [PATCH 15/27] Merge cache changes --- http-proxy/acceptor.py | 6 ++++-- http-proxy/main.py | 4 +++- http-proxy/message.py | 2 +- http-proxy/test.py | 30 +++++++++++++++--------------- http-proxy/worker.py | 2 -- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/http-proxy/acceptor.py b/http-proxy/acceptor.py index 2989833..75765b1 100644 --- a/http-proxy/acceptor.py +++ b/http-proxy/acceptor.py @@ -8,10 +8,12 @@ class Acceptor(Thread): - def __init__(self, address, port): + def __init__(self, address, port, cache_expire, cache_max_size): super().__init__() self.__address = address self.__port = port + self.__cache_expire = cache_expire + self.__cache_max_size = cache_max_size self.__server_socket = socket(AF_INET, SOCK_STREAM) self.__server_socket.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1) self.__server_socket.bind((address, port)) @@ -26,7 +28,7 @@ def interrupt(self): def run(self): workers = [] - cache = Cache() + cache = Cache(self.__cache_expire, self.__cache_max_size) while True: (client_socket, _) = self.__server_socket.accept() with self.__interrupted_mutex: diff --git a/http-proxy/main.py b/http-proxy/main.py index 91eaa3c..b9ed9ee 100644 --- a/http-proxy/main.py +++ b/http-proxy/main.py @@ -4,7 +4,9 @@ if __name__ == "__main__": port = int(argv[1]) - my_server = Acceptor("", port) + cache_expire = int(argv[2]) + cache_max_size = int(argv[3]) + my_server = Acceptor("", port, cache_expire, cache_max_size) my_server.start() x = input("input anything to exit\n") my_server.interrupt() diff --git a/http-proxy/message.py b/http-proxy/message.py index 093d965..0cedf22 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -44,7 +44,7 @@ def get_host(self): return None def can_cache(self): - return search(r"no-cache|no-store", self.__headers.get("Cache-Control")) is None + return search(r"no-cache|no-store", self.__headers.get("Cache-Control", "")) is None def __get_body_len(self): return int(self.__headers.get("Content-Length", "0")) diff --git a/http-proxy/test.py b/http-proxy/test.py index 1113a6e..5816fd4 100644 --- a/http-proxy/test.py +++ b/http-proxy/test.py @@ -1,34 +1,34 @@ import unittest -from cache import Cache from message import Message + class Test(unittest.TestCase): def test_create_message(self): message = Message() message.set_start_line("GET /background.png HTTP/1.0") - message.add_header('Host', 'example.org') - message.add_header('Content-Length', '110') - message.append_to_body('Click here') - self.assertEqual('GET /background.png HTTP/1.0\r\nHost: example.org\r\nContent-Length: 110\r\n\r\nClick here\r\n', message.to_str()) + message.add_header("Host", "example.org") + message.add_header("Content-Length", "110") + message.append_to_body(b'Click here') + self.assertEqual(b'GET /background.png HTTP/1.0\r\nHost: example.org\r\nContent-Length: 110\r\n\r\nClick here\r\n', message.to_bytes()) def test_get_host_message(self): message = Message() message.set_start_line("GET /background.png HTTP/1.0") - message.add_header('Host', 'example.org') - message.add_header('Content-Length', '110') - message.append_to_body('Click here') + message.add_header("Host", "example.org") + message.add_header("Content-Length", "110") + message.append_to_body(b'Click here') host = message.get_host() - self.assertEqual('example.org', host) + self.assertEqual("example.org", host) def test_need_cash_message(self): message = Message() message.set_start_line("GET /background.png HTTP/1.0") - message.add_header('Host', 'example.org') - message.add_header('Content-Length', '110') - message.append_to_body('Click here') + message.add_header("Host", "example.org") + message.add_header("Content-Length", "110") + message.append_to_body(b'Click here') can = message.can_cache() - message.add_header('Cache-Control', 'no-store') + message.add_header("Cache-Control", "no-store") cant = message.can_cache() - self.assertEqual(True, can) - self.assertEqual(False, cant) \ No newline at end of file + self.assertTrue(can) + self.assertFalse(cant) diff --git a/http-proxy/worker.py b/http-proxy/worker.py index 498073a..a17af7d 100644 --- a/http-proxy/worker.py +++ b/http-proxy/worker.py @@ -15,14 +15,12 @@ def interrupt(self): def run(self): request = self.__client_connection.receive_message() - print(request.to_bytes()) response = self.__cache.get(request) if response is None: server_connection = Connection() server_connection.establish(request.get_host()) server_connection.send_message(request) response = server_connection.receive_message() - print(response.to_bytes()) server_connection.close() self.__client_connection.send_message(response) self.__client_connection.close() From 359269a7744467fea6cf7f1d259a6c9125946eec Mon Sep 17 00:00:00 2001 From: vasyoid Date: Thu, 14 Mar 2019 09:48:00 +0300 Subject: [PATCH 16/27] Add chunked transfer support and custom request port --- http-proxy/connection.py | 13 ++++++++----- http-proxy/message.py | 16 +++++++++++++--- http-proxy/test.py | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/http-proxy/connection.py b/http-proxy/connection.py index e9dd5f6..337c1f5 100644 --- a/http-proxy/connection.py +++ b/http-proxy/connection.py @@ -1,20 +1,21 @@ -from socket import socket, gethostbyname, AF_INET, SOCK_STREAM, SHUT_WR +from socket import socket, AF_INET, SOCK_STREAM, SHUT_WR + from httparser import HTTParser MAX_CHUNK_LEN = 1024 -HTTP_PORT = 80 class Connection: def __init__(self, sock=None): self.__socket = sock + self.__host = "client" - def establish(self, url): + def establish(self, host): + self.__host = host self.__socket = socket(AF_INET, SOCK_STREAM) - host = gethostbyname(url) - self.__socket.connect((host, HTTP_PORT)) + self.__socket.connect(self.__host) def close(self): self.__socket.shutdown(SHUT_WR) @@ -23,6 +24,8 @@ def receive_message(self): parser = HTTParser() while True: chunk = self.__socket.recv(MAX_CHUNK_LEN) + if not chunk: + raise ConnectionAbortedError(self.__host) message = parser.append(chunk) if message: return message diff --git a/http-proxy/message.py b/http-proxy/message.py index 0cedf22..4e56206 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -2,6 +2,7 @@ NEW_LINE = "\r\n" NEW_LINE_B = b"\r\n" +HTTP_PORT = 80 class Message: @@ -19,7 +20,12 @@ def add_header(self, header_title, header_value): def append_to_body(self, chunk): self.__body += chunk - return len(self.__body) == self.__get_body_len() + if self.__is_chunked(): + complete = self.__body.endswith(b"0" + NEW_LINE_B * 2) + if complete: + self.__body = self.__body[:-2] + return complete + return len(self.__body) >= self.__get_body_len() def get_body(self): return self.__body @@ -32,14 +38,15 @@ def to_bytes(self): message += NEW_LINE.join(["%s: %s" % header for header in self.__headers.items()]) message += NEW_LINE * 2 message = message.encode() - if self.__get_body_len() > 0: + if self.__get_body_len() > 0 or self.__is_chunked(): message += self.__body + NEW_LINE_B return message def get_host(self): host = self.__headers.get("Host") if host: - return split(r":", host)[0] + host = split(r":", host) + return host[0], int(host[1]) if len(host) > 1 else HTTP_PORT else: return None @@ -48,3 +55,6 @@ def can_cache(self): def __get_body_len(self): return int(self.__headers.get("Content-Length", "0")) + + def __is_chunked(self): + return self.__headers.get("Transfer-Encoding", "") == "chunked" diff --git a/http-proxy/test.py b/http-proxy/test.py index 5816fd4..2b9e5fe 100644 --- a/http-proxy/test.py +++ b/http-proxy/test.py @@ -18,7 +18,7 @@ def test_get_host_message(self): message.add_header("Host", "example.org") message.add_header("Content-Length", "110") message.append_to_body(b'Click here') - host = message.get_host() + (host, _) = message.get_host() self.assertEqual("example.org", host) def test_need_cash_message(self): From d263f3cfd17bc5494329fd4970ed5b55130f2da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= =?UTF-8?q?=D0=B0=20=D0=9E=D1=80=D0=BB=D0=BE=D0=B2=D0=B0?= Date: Thu, 14 Mar 2019 22:46:17 +0300 Subject: [PATCH 17/27] add tests, fix cash --- http-proxy/cache.py | 16 ++++++++------ http-proxy/message.py | 2 ++ http-proxy/test.py | 49 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index 6462169..5f91657 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -11,18 +11,19 @@ def __init__(self, expire, max_size): def get(self, request): if request.can_cache() and not self.__check_expire(request): - return self.__cache.get(request) + return self.__cache.get(request, (None, None)) else: return None def put(self, request, response): + self.__clear_old_cache() if request.can_cache() and response.can_cache(): self.__cache[request] = (time.time(), response) def __check_expire(self, request): - set_time, value = self.__cache.get(request) + set_time, value = self.__cache.get(request, (None, None)) if set_time is not None: - if set_time + self.__expire > time.time(): + if set_time + self.__expire < time.time(): self.__cache.pop(request) return True else: @@ -34,10 +35,13 @@ def __check_cache_size(self): return sys.getsizeof(self.__cache) < self.__max_size def __clear_old_cache(self): + key_for_deleted = [] if not self.__check_cache_size(): - for key, set_time, value in self.__cache.items(): - if set_time + self.__expire > time.time(): - self.__cache.pop(key) + for key, value in self.__cache.items(): + if value[0] + self.__expire > time.time(): + key_for_deleted.append(key) + for key in key_for_deleted: + self.__cache.pop(key) if not self.__check_cache_size(): sorted_by_time_cache = sorted(self.__cache.items(), key=itemgetter(1)) for key, value in sorted_by_time_cache: diff --git a/http-proxy/message.py b/http-proxy/message.py index 0c768e4..2acecd2 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -42,6 +42,8 @@ def get_host(self): return None def can_cache(self): + if self.__headers.get("Cache-Control") is None: + return True return search(r"no-cache|no-store", self.__headers.get("Cache-Control")) is None def __get_body_len(self): diff --git a/http-proxy/test.py b/http-proxy/test.py index 1113a6e..0015780 100644 --- a/http-proxy/test.py +++ b/http-proxy/test.py @@ -1,3 +1,4 @@ +import time import unittest from cache import Cache from message import Message @@ -31,4 +32,50 @@ def test_need_cash_message(self): message.add_header('Cache-Control', 'no-store') cant = message.can_cache() self.assertEqual(True, can) - self.assertEqual(False, cant) \ No newline at end of file + self.assertEqual(False, cant) + + def test_put_in_cash(self): + message = Message() + message.set_start_line("GET /background.png HTTP/1.0") + message.add_header('Host', 'example.org') + message.add_header('Content-Length', '110') + message.append_to_body('Click here') + cache = Cache(1000, 100000) + cache.put(message, message) + _, getting_message = cache.get(message) + self.assertEqual(message, getting_message) + + def test_expire_cash(self): + message = Message() + message.set_start_line("GET /background.png HTTP/1.0") + message.add_header('Host', 'example.org') + message.add_header('Content-Length', '110') + message.append_to_body( + 'Click here') + cache = Cache(1, 100000) + cache.put(message, message) + time.sleep(5) + getting_message = cache.get(message) + self.assertEqual(None, getting_message) + + def test_size_cash(self): + message = Message() + message1 = Message() + message.set_start_line("GET /background.png HTTP/1.0") + message1.set_start_line("GET /background.png HTTP/1.0") + message.add_header('Host', 'example.org') + message.add_header('Content-Length', '110') + message1.add_header('Content-Length', '110') + message.append_to_body( + 'Click here') + message1.append_to_body( + 'Click here') + cache = Cache(1, 1) + cache.put(message, message) + _, getting_message = cache.get(message) + cache.put(message1, message1) + _, getting_message1 = cache.get(message1) + empty_message = cache.get(message) + self.assertEqual(message, getting_message) + self.assertEqual(message1, getting_message1) + self.assertEqual(None, empty_message) From 6f7e8d28f16ccf53c756738a1295b34806892a28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= =?UTF-8?q?=D0=B0=20=D0=9E=D1=80=D0=BB=D0=BE=D0=B2=D0=B0?= Date: Thu, 14 Mar 2019 23:36:03 +0300 Subject: [PATCH 18/27] fix tests --- http-proxy/test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/http-proxy/test.py b/http-proxy/test.py index 5fa2f51..977f073 100644 --- a/http-proxy/test.py +++ b/http-proxy/test.py @@ -10,24 +10,24 @@ def test_create_message(self): message.set_start_line("GET /background.png HTTP/1.0") message.add_header('Host', 'example.org') message.add_header('Content-Length', '110') - message.append_to_body('Click here') - self.assertEqual('GET /background.png HTTP/1.0\r\nHost: example.org\r\nContent-Length: 110\r\n\r\nClick here\r\n', message.to_str()) + message.append_to_body(b'Click here') + self.assertEqual(b'GET /background.png HTTP/1.0\r\nHost: example.org\r\nContent-Length: 110\r\n\r\nClick here\r\n', message.to_bytes()) def test_get_host_message(self): message = Message() message.set_start_line("GET /background.png HTTP/1.0") message.add_header('Host', 'example.org') message.add_header('Content-Length', '110') - message.append_to_body('Click here') + message.append_to_body(b'Click here') host = message.get_host() - self.assertEqual('example.org', host) + self.assertEqual(('example.org', 80), host) def test_need_cash_message(self): message = Message() message.set_start_line("GET /background.png HTTP/1.0") message.add_header('Host', 'example.org') message.add_header('Content-Length', '110') - message.append_to_body('Click here') + message.append_to_body(b'Click here') can = message.can_cache() message.add_header('Cache-Control', 'no-store') cant = message.can_cache() @@ -39,7 +39,7 @@ def test_put_in_cash(self): message.set_start_line("GET /background.png HTTP/1.0") message.add_header('Host', 'example.org') message.add_header('Content-Length', '110') - message.append_to_body('Click here') + message.append_to_body(b'Click here') cache = Cache(1000, 100000) cache.put(message, message) _, getting_message = cache.get(message) @@ -51,7 +51,7 @@ def test_expire_cash(self): message.add_header('Host', 'example.org') message.add_header('Content-Length', '110') message.append_to_body( - 'Click here') + b'Click here') cache = Cache(1, 100000) cache.put(message, message) time.sleep(5) @@ -67,9 +67,9 @@ def test_size_cash(self): message.add_header('Content-Length', '110') message1.add_header('Content-Length', '110') message.append_to_body( - 'Click here') + b'Click here') message1.append_to_body( - 'Click here') + b'Click here') cache = Cache(100000, 1) cache.put(message, message) _, getting_message = cache.get(message) From 1c536f20fbe08f150730469fa9b58e9f47bc009c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= =?UTF-8?q?=D0=B0=20=D0=9E=D1=80=D0=BB=D0=BE=D0=B2=D0=B0?= Date: Sat, 16 Mar 2019 22:12:55 +0300 Subject: [PATCH 19/27] add message cache --- http-proxy/cache.py | 14 ++++++++++---- http-proxy/main.py | 4 +++- http-proxy/message.py | 11 +++++++++++ http-proxy/worker.py | 15 ++++++++++++++- 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index 5f91657..65fcd07 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -1,5 +1,6 @@ import time import sys +import logging from operator import itemgetter @@ -10,18 +11,22 @@ def __init__(self, expire, max_size): self.__max_size = max_size def get(self, request): + print(self.__check_expire(request)) if request.can_cache() and not self.__check_expire(request): - return self.__cache.get(request, (None, None)) + print("WIN!!!!!!") + logging.info("Get from cache: %s" % (str(request))) + return self.__cache.get(hash(request), (None, None)) else: - return None + return None, None def put(self, request, response): + print(request, hash(request)) self.__clear_old_cache() if request.can_cache() and response.can_cache(): - self.__cache[request] = (time.time(), response) + self.__cache[hash(request)] = (time.time(), response) def __check_expire(self, request): - set_time, value = self.__cache.get(request, (None, None)) + set_time, value = self.__cache.get(hash(request), (None, None)) if set_time is not None: if set_time + self.__expire < time.time(): self.__cache.pop(request) @@ -35,6 +40,7 @@ def __check_cache_size(self): return sys.getsizeof(self.__cache) < self.__max_size def __clear_old_cache(self): + logging.info("Clear old cache values") key_for_deleted = [] if not self.__check_cache_size(): for key, value in self.__cache.items(): diff --git a/http-proxy/main.py b/http-proxy/main.py index b9ed9ee..d93ddfb 100644 --- a/http-proxy/main.py +++ b/http-proxy/main.py @@ -1,11 +1,13 @@ from sys import argv from acceptor import Acceptor - +import logging if __name__ == "__main__": port = int(argv[1]) cache_expire = int(argv[2]) cache_max_size = int(argv[3]) + logging.basicConfig(filename="proxy.log", level=logging.INFO) + logging.info("Start proxy on port %s, with cache expire %s and cache size %s" % (port, cache_expire, cache_max_size)) my_server = Acceptor("", port, cache_expire, cache_max_size) my_server.start() x = input("input anything to exit\n") diff --git a/http-proxy/message.py b/http-proxy/message.py index 4ed6450..99867e5 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -60,3 +60,14 @@ def __get_body_len(self): def __is_chunked(self): return self.__headers.get("Transfer-Encoding", "") == "chunked" + + def __hash__(self): + return hash(self.__body) + hash(self.__start_line) + hash(self.get_host()) + + def __eq__(self, other): + if not isinstance(other, Message): + return False + return self.__body == other.__body and self.__start_line == other.__start_line and self.__headers == other.__headers + + def __str__(self): + return self.__start_line + str(self.__headers) \ No newline at end of file diff --git a/http-proxy/worker.py b/http-proxy/worker.py index a17af7d..a6f1f91 100644 --- a/http-proxy/worker.py +++ b/http-proxy/worker.py @@ -1,5 +1,7 @@ from threading import Thread from connection import Connection +import logging +import time class Worker(Thread): @@ -15,7 +17,16 @@ def interrupt(self): def run(self): request = self.__client_connection.receive_message() - response = self.__cache.get(request) + logging.info("Get request: %s" % (str(request))) + timestamp, response = self.__cache.get(request) + if timestamp is not None: + request.add_header('If-Modified-Since', time.ctime(timestamp)) + server_connection = Connection() + server_connection.establish(request.get_host()) + server_connection.send_message(request) + tmp_response = server_connection.receive_message() + if tmp_response.get_status() != 304: + response = None if response is None: server_connection = Connection() server_connection.establish(request.get_host()) @@ -23,4 +34,6 @@ def run(self): response = server_connection.receive_message() server_connection.close() self.__client_connection.send_message(response) + logging.info("Send response: %s" % (str(request))) + self.__cache.put(request, response) self.__client_connection.close() From c4910c14ed3d83753995c7ecbccfcd0884591aff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= =?UTF-8?q?=D0=B0=20=D0=9E=D1=80=D0=BB=D0=BE=D0=B2=D0=B0?= Date: Sun, 17 Mar 2019 13:11:26 +0300 Subject: [PATCH 20/27] add more logging --- http-proxy/acceptor.py | 2 ++ http-proxy/cache.py | 3 --- http-proxy/connection.py | 3 ++- http-proxy/message.py | 5 +++-- http-proxy/worker.py | 14 ++++++++++---- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/http-proxy/acceptor.py b/http-proxy/acceptor.py index 75765b1..26c72b8 100644 --- a/http-proxy/acceptor.py +++ b/http-proxy/acceptor.py @@ -1,5 +1,6 @@ from socket import socket, AF_INET, SOCK_STREAM, SOL_SOCKET, SO_REUSEADDR, SHUT_WR from threading import Thread, RLock +import logging from cache import Cache from worker import Worker @@ -10,6 +11,7 @@ class Acceptor(Thread): def __init__(self, address, port, cache_expire, cache_max_size): super().__init__() + logging.info("Accept connection address: %s, port %s" % (address, port)) self.__address = address self.__port = port self.__cache_expire = cache_expire diff --git a/http-proxy/cache.py b/http-proxy/cache.py index 65fcd07..2cc3425 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -11,16 +11,13 @@ def __init__(self, expire, max_size): self.__max_size = max_size def get(self, request): - print(self.__check_expire(request)) if request.can_cache() and not self.__check_expire(request): - print("WIN!!!!!!") logging.info("Get from cache: %s" % (str(request))) return self.__cache.get(hash(request), (None, None)) else: return None, None def put(self, request, response): - print(request, hash(request)) self.__clear_old_cache() if request.can_cache() and response.can_cache(): self.__cache[hash(request)] = (time.time(), response) diff --git a/http-proxy/connection.py b/http-proxy/connection.py index 337c1f5..3b39ac5 100644 --- a/http-proxy/connection.py +++ b/http-proxy/connection.py @@ -1,5 +1,5 @@ from socket import socket, AF_INET, SOCK_STREAM, SHUT_WR - +import logging from httparser import HTTParser @@ -25,6 +25,7 @@ def receive_message(self): while True: chunk = self.__socket.recv(MAX_CHUNK_LEN) if not chunk: + logging.error("Connect aborted %s" % self.__host) raise ConnectionAbortedError(self.__host) message = parser.append(chunk) if message: diff --git a/http-proxy/message.py b/http-proxy/message.py index 99867e5..47e708e 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -67,7 +67,8 @@ def __hash__(self): def __eq__(self, other): if not isinstance(other, Message): return False - return self.__body == other.__body and self.__start_line == other.__start_line and self.__headers == other.__headers + return (self.__body == other.__body + and self.__start_line == other.__start_line and self.__headers == other.__headers) def __str__(self): - return self.__start_line + str(self.__headers) \ No newline at end of file + return self.__start_line + str(self.__headers) diff --git a/http-proxy/worker.py b/http-proxy/worker.py index a6f1f91..bc0620d 100644 --- a/http-proxy/worker.py +++ b/http-proxy/worker.py @@ -11,14 +11,14 @@ def __init__(self, sock, cache): self.__socket = sock self.__cache = cache self.__client_connection = Connection(self.__socket) + logging.info("Start new worker") def interrupt(self): + logging.info("Interrupt worker") self.__client_connection.close() - def run(self): - request = self.__client_connection.receive_message() - logging.info("Get request: %s" % (str(request))) - timestamp, response = self.__cache.get(request) + def __get_from_cache_with_check(self, cache_answer, request): + timestamp, response = cache_answer if timestamp is not None: request.add_header('If-Modified-Since', time.ctime(timestamp)) server_connection = Connection() @@ -27,6 +27,12 @@ def run(self): tmp_response = server_connection.receive_message() if tmp_response.get_status() != 304: response = None + return response + + def run(self): + request = self.__client_connection.receive_message() + logging.info("Get request: %s" % (str(request))) + response = self.__get_from_cache_with_check(self.__cache.get(request), request) if response is None: server_connection = Connection() server_connection.establish(request.get_host()) From 47ee6647817f0d12652719f676b5c8a8a6956a5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= =?UTF-8?q?=D0=B0=20=D0=9E=D1=80=D0=BB=D0=BE=D0=B2=D0=B0?= Date: Sun, 17 Mar 2019 22:21:12 +0300 Subject: [PATCH 21/27] add message methods, make more functions --- http-proxy/cache.py | 18 ++++++++---------- http-proxy/message.py | 9 ++++++++- http-proxy/test.py | 8 ++++---- http-proxy/worker.py | 38 ++++++++++++++++++++------------------ 4 files changed, 40 insertions(+), 33 deletions(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index 2cc3425..a2a6681 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -1,6 +1,6 @@ -import time import sys import logging +from time import time from operator import itemgetter @@ -13,25 +13,23 @@ def __init__(self, expire, max_size): def get(self, request): if request.can_cache() and not self.__check_expire(request): logging.info("Get from cache: %s" % (str(request))) - return self.__cache.get(hash(request), (None, None)) + return self.__cache.get(str(request), (None, None)) else: return None, None def put(self, request, response): self.__clear_old_cache() if request.can_cache() and response.can_cache(): - self.__cache[hash(request)] = (time.time(), response) + self.__cache[str(request)] = (time(), response) def __check_expire(self, request): - set_time, value = self.__cache.get(hash(request), (None, None)) + set_time, value = self.__cache.get(str(request), (None, None)) if set_time is not None: - if set_time + self.__expire < time.time(): + if set_time + self.__expire < time(): self.__cache.pop(request) return True - else: - return False - else: - return True + return False + return True def __check_cache_size(self): return sys.getsizeof(self.__cache) < self.__max_size @@ -41,7 +39,7 @@ def __clear_old_cache(self): key_for_deleted = [] if not self.__check_cache_size(): for key, value in self.__cache.items(): - if value[0] + self.__expire > time.time(): + if value[0] + self.__expire < time(): key_for_deleted.append(key) for key in key_for_deleted: self.__cache.pop(key) diff --git a/http-proxy/message.py b/http-proxy/message.py index 47e708e..4d11a29 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -1,4 +1,5 @@ from re import split, search +from time import ctime NEW_LINE = "\r\n" NEW_LINE_B = b"\r\n" @@ -55,6 +56,12 @@ def can_cache(self): return True return search(r"no-cache|no-store", self.__headers.get("Cache-Control")) is None + def is_modify(self): + return self.get_status() != 304 + + def add_modify_request(self, timestamp): + self.add_header('If-Modified-Since', ctime(timestamp)) + def __get_body_len(self): return int(self.__headers.get("Content-Length", "0")) @@ -71,4 +78,4 @@ def __eq__(self, other): and self.__start_line == other.__start_line and self.__headers == other.__headers) def __str__(self): - return self.__start_line + str(self.__headers) + return self.__start_line + str(self.get_host()) + str(self.__body) diff --git a/http-proxy/test.py b/http-proxy/test.py index 977f073..ce8fdcc 100644 --- a/http-proxy/test.py +++ b/http-proxy/test.py @@ -1,9 +1,9 @@ -import time -import unittest +from unittest import TestCase +from time import sleep from cache import Cache from message import Message -class Test(unittest.TestCase): +class Test(TestCase): def test_create_message(self): message = Message() @@ -54,7 +54,7 @@ def test_expire_cash(self): b'Click here') cache = Cache(1, 100000) cache.put(message, message) - time.sleep(5) + sleep(5) getting_message = cache.get(message) self.assertEqual(None, getting_message) diff --git a/http-proxy/worker.py b/http-proxy/worker.py index bc0620d..8da30dd 100644 --- a/http-proxy/worker.py +++ b/http-proxy/worker.py @@ -1,7 +1,6 @@ from threading import Thread from connection import Connection import logging -import time class Worker(Thread): @@ -17,29 +16,32 @@ def interrupt(self): logging.info("Interrupt worker") self.__client_connection.close() - def __get_from_cache_with_check(self, cache_answer, request): - timestamp, response = cache_answer - if timestamp is not None: - request.add_header('If-Modified-Since', time.ctime(timestamp)) - server_connection = Connection() - server_connection.establish(request.get_host()) - server_connection.send_message(request) - tmp_response = server_connection.receive_message() - if tmp_response.get_status() != 304: - response = None - return response - def run(self): request = self.__client_connection.receive_message() logging.info("Get request: %s" % (str(request))) response = self.__get_from_cache_with_check(self.__cache.get(request), request) if response is None: - server_connection = Connection() - server_connection.establish(request.get_host()) - server_connection.send_message(request) - response = server_connection.receive_message() - server_connection.close() + response = Worker.__connect_and_send(request) self.__client_connection.send_message(response) logging.info("Send response: %s" % (str(request))) self.__cache.put(request, response) self.__client_connection.close() + + @staticmethod + def __connect_and_send(request): + server_connection = Connection() + server_connection.establish(request.get_host()) + server_connection.send_message(request) + response = server_connection.receive_message() + server_connection.close() + return response + + @staticmethod + def __get_from_cache_with_check(cache_answer, request): + timestamp, response = cache_answer + if timestamp is not None: + request.add_modify_request(timestamp) + tmp_response = Worker.__connect_and_send(request) + if tmp_response.is_modify(): + response = None + return response From 5881bad80f6c6ae48dc9361e08a4c56c0fdf79df Mon Sep 17 00:00:00 2001 From: vasyoid Date: Sun, 17 Mar 2019 20:34:38 +0300 Subject: [PATCH 22/27] Add supported methods validation --- http-proxy/cache.py | 30 +++++++++++++++++++----------- http-proxy/connection.py | 12 +++++++++--- http-proxy/message.py | 20 +++++++++++++++++--- http-proxy/worker.py | 14 +++++++++++--- 4 files changed, 56 insertions(+), 20 deletions(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index a2a6681..6376e7c 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -1,26 +1,31 @@ import sys import logging +from threading import RLock from time import time from operator import itemgetter class Cache: + def __init__(self, expire, max_size): self.__cache = {} self.__expire = expire self.__max_size = max_size + self.__dict_mutex = RLock() def get(self, request): - if request.can_cache() and not self.__check_expire(request): - logging.info("Get from cache: %s" % (str(request))) - return self.__cache.get(str(request), (None, None)) - else: - return None, None + with self.__dict_mutex: + if request.can_cache() and not self.__check_expire(request): + logging.info("Get from cache: %s" % (str(request))) + return self.__cache.get(str(request), (None, None)) + else: + return None, None def put(self, request, response): - self.__clear_old_cache() - if request.can_cache() and response.can_cache(): - self.__cache[str(request)] = (time(), response) + with self.__dict_mutex: + self.__clear_old_cache() + if request.can_cache() and response.can_cache(): + self.__cache[str(request)] = (time(), response) def __check_expire(self, request): set_time, value = self.__cache.get(str(request), (None, None)) @@ -35,17 +40,20 @@ def __check_cache_size(self): return sys.getsizeof(self.__cache) < self.__max_size def __clear_old_cache(self): - logging.info("Clear old cache values") key_for_deleted = [] if not self.__check_cache_size(): for key, value in self.__cache.items(): if value[0] + self.__expire < time(): key_for_deleted.append(key) for key in key_for_deleted: - self.__cache.pop(key) + self.__pop_key_and_log(key) if not self.__check_cache_size(): sorted_by_time_cache = sorted(self.__cache.items(), key=itemgetter(1)) for key, value in sorted_by_time_cache: - self.__cache.pop(key) + self.__pop_key_and_log(key) if self.__check_cache_size(): return + + def __pop_key_and_log(self, key): + logging.info("Remove from cache: %s" % str(self.__cache[key][1])) + self.__cache.pop(key) diff --git a/http-proxy/connection.py b/http-proxy/connection.py index 3b39ac5..25d7d1c 100644 --- a/http-proxy/connection.py +++ b/http-proxy/connection.py @@ -18,15 +18,21 @@ def establish(self, host): self.__socket.connect(self.__host) def close(self): - self.__socket.shutdown(SHUT_WR) + try: + self.__socket.shutdown(SHUT_WR) + self.__socket.close() + except OSError: + pass + finally: + logging.info("Socket closed") def receive_message(self): parser = HTTParser() while True: chunk = self.__socket.recv(MAX_CHUNK_LEN) if not chunk: - logging.error("Connect aborted %s" % self.__host) - raise ConnectionAbortedError(self.__host) + logging.error("Connection aborted by %s" % self.__host) + return None message = parser.append(chunk) if message: return message diff --git a/http-proxy/message.py b/http-proxy/message.py index 4d11a29..41d8445 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -4,6 +4,13 @@ NEW_LINE = "\r\n" NEW_LINE_B = b"\r\n" HTTP_PORT = 80 +SUPPORTED_METHODS = ["GET", "POST", "HEAD"] + + +def not_implemented_response(): + response = Message() + response.set_start_line("HTTP/1.1 501 Not Implemented") + return response class Message: @@ -48,8 +55,7 @@ def get_host(self): if host: host = split(r":", host) return host[0], int(host[1]) if len(host) > 1 else HTTP_PORT - else: - return None + return None, None def can_cache(self): if self.__headers.get("Cache-Control") is None: @@ -62,6 +68,10 @@ def is_modify(self): def add_modify_request(self, timestamp): self.add_header('If-Modified-Since', ctime(timestamp)) + def is_method_supported(self): + method = self.__start_line.split(" ")[0] + return method in SUPPORTED_METHODS + def __get_body_len(self): return int(self.__headers.get("Content-Length", "0")) @@ -78,4 +88,8 @@ def __eq__(self, other): and self.__start_line == other.__start_line and self.__headers == other.__headers) def __str__(self): - return self.__start_line + str(self.get_host()) + str(self.__body) + host, port = self.get_host() + body = self.__body if self.__body is not None else "" + if host is not None: + return "%s %s:%s %s" % (self.__start_line, host, port, body) + return "%s %s" % (self.__start_line, body) diff --git a/http-proxy/worker.py b/http-proxy/worker.py index 8da30dd..526631e 100644 --- a/http-proxy/worker.py +++ b/http-proxy/worker.py @@ -2,6 +2,8 @@ from connection import Connection import logging +from message import not_implemented_response + class Worker(Thread): @@ -10,20 +12,26 @@ def __init__(self, sock, cache): self.__socket = sock self.__cache = cache self.__client_connection = Connection(self.__socket) - logging.info("Start new worker") def interrupt(self): logging.info("Interrupt worker") self.__client_connection.close() def run(self): + logging.info("Start new worker") request = self.__client_connection.receive_message() + if request is None: + logging.info("Could not get request") + return logging.info("Get request: %s" % (str(request))) - response = self.__get_from_cache_with_check(self.__cache.get(request), request) + if not request.is_method_supported(): + response = not_implemented_response() + else: + response = self.__get_from_cache_with_check(self.__cache.get(request), request) if response is None: response = Worker.__connect_and_send(request) + logging.info("Send response: %s" % (str(response))) self.__client_connection.send_message(response) - logging.info("Send response: %s" % (str(request))) self.__cache.put(request, response) self.__client_connection.close() From dfbaf2872b5630ef9027c46ff8ad255d3040b881 Mon Sep 17 00:00:00 2001 From: vasyoid Date: Mon, 18 Mar 2019 02:58:00 +0300 Subject: [PATCH 23/27] Improve logging --- http-proxy/acceptor.py | 1 + http-proxy/worker.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/http-proxy/acceptor.py b/http-proxy/acceptor.py index 26c72b8..f0a884d 100644 --- a/http-proxy/acceptor.py +++ b/http-proxy/acceptor.py @@ -36,6 +36,7 @@ def run(self): with self.__interrupted_mutex: if self.__interrupted: break + logging.info("Accept new connection from: %s" % str(client_socket.getpeername())) worker = Worker(client_socket, cache) workers.append(worker) worker.start() diff --git a/http-proxy/worker.py b/http-proxy/worker.py index 526631e..6b9e708 100644 --- a/http-proxy/worker.py +++ b/http-proxy/worker.py @@ -14,11 +14,9 @@ def __init__(self, sock, cache): self.__client_connection = Connection(self.__socket) def interrupt(self): - logging.info("Interrupt worker") self.__client_connection.close() def run(self): - logging.info("Start new worker") request = self.__client_connection.receive_message() if request is None: logging.info("Could not get request") From 52f29fb1e1f62f28770b2de38c6ba59e0bbe1576 Mon Sep 17 00:00:00 2001 From: vasyoid Date: Mon, 18 Mar 2019 03:02:35 +0300 Subject: [PATCH 24/27] Fix tests --- http-proxy/cache.py | 6 ++--- http-proxy/test.py | 60 ++++++++++++++++++++++++--------------------- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index 6376e7c..d73c3fa 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -31,7 +31,7 @@ def __check_expire(self, request): set_time, value = self.__cache.get(str(request), (None, None)) if set_time is not None: if set_time + self.__expire < time(): - self.__cache.pop(request) + self.__pop_key_and_log(request) return True return False return True @@ -55,5 +55,5 @@ def __clear_old_cache(self): return def __pop_key_and_log(self, key): - logging.info("Remove from cache: %s" % str(self.__cache[key][1])) - self.__cache.pop(key) + logging.info("Remove from cache: %s" % str(self.__cache[str(key)][1])) + self.__cache.pop(str(key)) diff --git a/http-proxy/test.py b/http-proxy/test.py index ce8fdcc..d530575 100644 --- a/http-proxy/test.py +++ b/http-proxy/test.py @@ -3,33 +3,40 @@ from cache import Cache from message import Message + +BODY = b'Click here' + + class Test(TestCase): def test_create_message(self): message = Message() message.set_start_line("GET /background.png HTTP/1.0") - message.add_header('Host', 'example.org') - message.add_header('Content-Length', '110') - message.append_to_body(b'Click here') - self.assertEqual(b'GET /background.png HTTP/1.0\r\nHost: example.org\r\nContent-Length: 110\r\n\r\nClick here\r\n', message.to_bytes()) + message.add_header("Host", "example.org") + message.add_header("Content-Length", "110") + message.append_to_body(BODY) + self.assertEqual(b'GET /background.png HTTP/1.0\r\n' + b'Host: example.org\r\nContent-Length: 110\r\n\r\n' + b'Click here' + b'\r\n', message.to_bytes()) def test_get_host_message(self): message = Message() message.set_start_line("GET /background.png HTTP/1.0") - message.add_header('Host', 'example.org') - message.add_header('Content-Length', '110') - message.append_to_body(b'Click here') + message.add_header("Host", "example.org") + message.add_header("Content-Length", "110") + message.append_to_body(BODY) host = message.get_host() - self.assertEqual(('example.org', 80), host) + self.assertEqual(("example.org", 80), host) def test_need_cash_message(self): message = Message() message.set_start_line("GET /background.png HTTP/1.0") - message.add_header('Host', 'example.org') - message.add_header('Content-Length', '110') - message.append_to_body(b'Click here') + message.add_header("Host", "example.org") + message.add_header("Content-Length", "110") + message.append_to_body(BODY) can = message.can_cache() - message.add_header('Cache-Control', 'no-store') + message.add_header("Cache-Control", "no-store") cant = message.can_cache() self.assertEqual(True, can) self.assertEqual(False, cant) @@ -37,9 +44,9 @@ def test_need_cash_message(self): def test_put_in_cash(self): message = Message() message.set_start_line("GET /background.png HTTP/1.0") - message.add_header('Host', 'example.org') - message.add_header('Content-Length', '110') - message.append_to_body(b'Click here') + message.add_header("Host", "example.org") + message.add_header("Content-Length", "110") + message.append_to_body(BODY) cache = Cache(1000, 100000) cache.put(message, message) _, getting_message = cache.get(message) @@ -48,28 +55,25 @@ def test_put_in_cash(self): def test_expire_cash(self): message = Message() message.set_start_line("GET /background.png HTTP/1.0") - message.add_header('Host', 'example.org') - message.add_header('Content-Length', '110') - message.append_to_body( - b'Click here') + message.add_header("Host", "example.org") + message.add_header("Content-Length", "110") + message.append_to_body(BODY) cache = Cache(1, 100000) cache.put(message, message) sleep(5) getting_message = cache.get(message) - self.assertEqual(None, getting_message) + self.assertEqual((None, None), getting_message) def test_size_cash(self): message = Message() message1 = Message() message.set_start_line("GET /background.png HTTP/1.0") message1.set_start_line("GET /background.png HTTP/1.0") - message.add_header('Host', 'example.org') - message.add_header('Content-Length', '110') - message1.add_header('Content-Length', '110') - message.append_to_body( - b'Click here') - message1.append_to_body( - b'Click here') + message.add_header("Host", "example.org") + message.add_header("Content-Length", "110") + message1.add_header("Content-Length", "110") + message.append_to_body(BODY) + message1.append_to_body(BODY) cache = Cache(100000, 1) cache.put(message, message) _, getting_message = cache.get(message) @@ -78,4 +82,4 @@ def test_size_cash(self): empty_message = cache.get(message) self.assertEqual(message, getting_message) self.assertEqual(message1, getting_message1) - self.assertEqual(None, empty_message) + self.assertEqual((None, None), empty_message) From c60f5b3ed160ca4738ff4fbe380ebf4e8a1ef377 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0=D0=BD=D0=B4=D1=80?= =?UTF-8?q?=D0=B0=20=D0=9E=D1=80=D0=BB=D0=BE=D0=B2=D0=B0?= Date: Wed, 20 Mar 2019 00:04:55 +0300 Subject: [PATCH 25/27] fix cache size --- http-proxy/cache.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index d73c3fa..f75c37d 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -23,8 +23,8 @@ def get(self, request): def put(self, request, response): with self.__dict_mutex: - self.__clear_old_cache() - if request.can_cache() and response.can_cache(): + self.__clear_old_cache(response) + if self.__check_cache_size(response) and request.can_cache() and response.can_cache(): self.__cache[str(request)] = (time(), response) def __check_expire(self, request): @@ -36,22 +36,22 @@ def __check_expire(self, request): return False return True - def __check_cache_size(self): - return sys.getsizeof(self.__cache) < self.__max_size + def __check_cache_size(self, response): + return sys.getsizeof(self.__cache) + sys.getsizeof(response) < self.__max_size - def __clear_old_cache(self): + def __clear_old_cache(self, response): key_for_deleted = [] - if not self.__check_cache_size(): + if not self.__check_cache_size(response): for key, value in self.__cache.items(): if value[0] + self.__expire < time(): key_for_deleted.append(key) for key in key_for_deleted: self.__pop_key_and_log(key) - if not self.__check_cache_size(): + if not self.__check_cache_size(response): sorted_by_time_cache = sorted(self.__cache.items(), key=itemgetter(1)) for key, value in sorted_by_time_cache: self.__pop_key_and_log(key) - if self.__check_cache_size(): + if self.__check_cache_size(response): return def __pop_key_and_log(self, key): From 10fa94dc1b91f53a3ffea7c3e8e55450eb035044 Mon Sep 17 00:00:00 2001 From: vasyoid Date: Wed, 20 Mar 2019 00:57:57 +0300 Subject: [PATCH 26/27] Fix minor issues --- http-proxy/cache.py | 33 ++++++++++++++++++++------------- http-proxy/connection.py | 13 ++++++++++--- http-proxy/message.py | 4 ++++ http-proxy/test.py | 2 +- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index f75c37d..087f3d0 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -12,6 +12,7 @@ def __init__(self, expire, max_size): self.__expire = expire self.__max_size = max_size self.__dict_mutex = RLock() + self.__cache_size = 0 def get(self, request): with self.__dict_mutex: @@ -22,10 +23,13 @@ def get(self, request): return None, None def put(self, request, response): + if not request.can_cache() or not response.can_cache_with_size(self.__max_size): + return with self.__dict_mutex: self.__clear_old_cache(response) - if self.__check_cache_size(response) and request.can_cache() and response.can_cache(): + if self.__check_cache_size(response): self.__cache[str(request)] = (time(), response) + self.__cache_size += sys.getsizeof(response) def __check_expire(self, request): set_time, value = self.__cache.get(str(request), (None, None)) @@ -37,23 +41,26 @@ def __check_expire(self, request): return True def __check_cache_size(self, response): - return sys.getsizeof(self.__cache) + sys.getsizeof(response) < self.__max_size + return self.__cache_size + sys.getsizeof(response) < self.__max_size def __clear_old_cache(self, response): + if self.__check_cache_size(response): + return key_for_deleted = [] - if not self.__check_cache_size(response): - for key, value in self.__cache.items(): - if value[0] + self.__expire < time(): - key_for_deleted.append(key) + for key, value in self.__cache.items(): + if value[0] + self.__expire < time(): + key_for_deleted.append(key) for key in key_for_deleted: self.__pop_key_and_log(key) - if not self.__check_cache_size(response): - sorted_by_time_cache = sorted(self.__cache.items(), key=itemgetter(1)) - for key, value in sorted_by_time_cache: - self.__pop_key_and_log(key) - if self.__check_cache_size(response): - return + sorted_by_time_cache = sorted(self.__cache.items(), key=itemgetter(1)) + for key, value in sorted_by_time_cache: + if self.__check_cache_size(response): + return + self.__pop_key_and_log(key) def __pop_key_and_log(self, key): - logging.info("Remove from cache: %s" % str(self.__cache[str(key)][1])) + key = str(key) + _, value = self.__cache[key] + logging.info("Remove from cache: %s" % str(value)) + self.__cache_size -= sys.getsizeof(value) self.__cache.pop(str(key)) diff --git a/http-proxy/connection.py b/http-proxy/connection.py index 25d7d1c..6556d5a 100644 --- a/http-proxy/connection.py +++ b/http-proxy/connection.py @@ -29,7 +29,10 @@ def close(self): def receive_message(self): parser = HTTParser() while True: - chunk = self.__socket.recv(MAX_CHUNK_LEN) + try: + chunk = self.__socket.recv(MAX_CHUNK_LEN) + except BrokenPipeError: + chunk = 0 if not chunk: logging.error("Connection aborted by %s" % self.__host) return None @@ -41,7 +44,11 @@ def send_message(self, message): msg = message.to_bytes() total_sent = 0 while total_sent < len(msg): - sent = self.__socket.send(msg[total_sent:]) - if sent == 0: + try: + sent = self.__socket.send(msg[total_sent:]) + except BrokenPipeError: + sent = 0 + if not sent: + logging.error("Connection aborted by %s" % self.__host) break total_sent += sent diff --git a/http-proxy/message.py b/http-proxy/message.py index 41d8445..630410f 100644 --- a/http-proxy/message.py +++ b/http-proxy/message.py @@ -1,3 +1,4 @@ +import sys from re import split, search from time import ctime @@ -62,6 +63,9 @@ def can_cache(self): return True return search(r"no-cache|no-store", self.__headers.get("Cache-Control")) is None + def can_cache_with_size(self, cache_size): + return self.can_cache() and sys.getsizeof(self) <= cache_size + def is_modify(self): return self.get_status() != 304 diff --git a/http-proxy/test.py b/http-proxy/test.py index d530575..a93606b 100644 --- a/http-proxy/test.py +++ b/http-proxy/test.py @@ -74,7 +74,7 @@ def test_size_cash(self): message1.add_header("Content-Length", "110") message.append_to_body(BODY) message1.append_to_body(BODY) - cache = Cache(100000, 1) + cache = Cache(100000, 60) cache.put(message, message) _, getting_message = cache.get(message) cache.put(message1, message1) From 4683bbd99ba58ddd76351d49f77e36efa2fdc989 Mon Sep 17 00:00:00 2001 From: Vasily Kuporosov Date: Wed, 20 Mar 2019 11:37:21 +0300 Subject: [PATCH 27/27] Fix clear old cache --- http-proxy/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http-proxy/cache.py b/http-proxy/cache.py index 087f3d0..7f18f9b 100644 --- a/http-proxy/cache.py +++ b/http-proxy/cache.py @@ -52,7 +52,7 @@ def __clear_old_cache(self, response): key_for_deleted.append(key) for key in key_for_deleted: self.__pop_key_and_log(key) - sorted_by_time_cache = sorted(self.__cache.items(), key=itemgetter(1)) + sorted_by_time_cache = sorted(self.__cache.items(), key=itemgetter(0)) for key, value in sorted_by_time_cache: if self.__check_cache_size(response): return