From 79dc5dc9500079fb0c7ab9f220f653483febca54 Mon Sep 17 00:00:00 2001 From: ganti <28899559+ganti@users.noreply.github.com> Date: Fri, 4 Nov 2022 02:17:27 +0100 Subject: [PATCH 01/10] Update README CTL_OFFSET #19 --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d892a54..6e60ee1 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ pip3 install axeman ``` $ axeman -h usage: axeman [-h] [-f LOG_FILE] [-s START_OFFSET] [-l] [-u CTL_URL] - [-o OUTPUT_DIR] [-v] [-c CONCURRENCY_COUNT] + [-z CTL_OFFSET] [-o OUTPUT_DIR] [-v] [-c CONCURRENCY_COUNT] Pull down certificate transparency list information @@ -29,9 +29,11 @@ optional arguments: -s START_OFFSET Skip N number of lists before starting -l List all available certificate lists -u CTL_URL Retrieve this CTL only + -z CTL_OFFSET The CTL offset to start at -o OUTPUT_DIR The output directory to store certificates in -v Print out verbose/debug info -c CONCURRENCY_COUNT The number of concurrent downloads to run at a time + ``` ## Demo From d2e463b9426b6f99bbad363428be84286f870114 Mon Sep 17 00:00:00 2001 From: ganti <28899559+ganti@users.noreply.github.com> Date: Fri, 4 Nov 2022 02:25:42 +0100 Subject: [PATCH 02/10] fix fails.csv generation - missing eol #15 --- axeman/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/axeman/core.py b/axeman/core.py index d67a4c2..564fddc 100644 --- a/axeman/core.py +++ b/axeman/core.py @@ -47,7 +47,7 @@ async def download_worker(session, log_info, work_deque, download_queue): logging.error("Exception getting block {}-{}! {}".format(start, end, e)) else: # Notorious for else, if we didn't encounter a break our request failed 3 times D: with open('/tmp/fails.csv', 'a') as f: - f.write(",".join([log_info['url'], str(start), str(end)])) + f.write(",".join([log_info['url'], str(start), str(end)])+"\n") return for index, entry in zip(range(start, end + 1), entry_list['entries']): From 31d58e2ba71f2643d4d86632797247bffe36c6b3 Mon Sep 17 00:00:00 2001 From: ganti <28899559+ganti@users.noreply.github.com> Date: Fri, 4 Nov 2022 02:35:05 +0100 Subject: [PATCH 03/10] Add handling of rate limiting Error 429 #15 --- axeman/core.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/axeman/core.py b/axeman/core.py index 564fddc..e9868e3 100644 --- a/axeman/core.py +++ b/axeman/core.py @@ -37,14 +37,26 @@ async def download_worker(session, log_info, work_deque, download_queue): logging.debug("[{}] Queueing up blocks {}-{}...".format(log_info['url'], start, end)) - for x in range(3): + for x in range(30): try: async with session.get(certlib.DOWNLOAD.format(log_info['url'], start, end)) as response: + if response.status == 429: + logging.info("got http status {}-{}".format(x, response.status)) + time.sleep(min(x + 1, 10)) + continue + if response.content_type != 'application/json': + text = await response.text() + logging.error("got response {}-{}-{}".format(x, response.content_type, text)) entry_list = await response.json() logging.debug("[{}] Retrieved blocks {}-{}...".format(log_info['url'], start, end)) + if entry_list.get('error_code', '') == 'rate_limited': + logging.info("{}-{}".format(x, entry_list)) + time.sleep(min(x + 1, 10)) + continue break except Exception as e: logging.error("Exception getting block {}-{}! {}".format(start, end, e)) + time.sleep(min(x+1, 10)) else: # Notorious for else, if we didn't encounter a break our request failed 3 times D: with open('/tmp/fails.csv', 'a') as f: f.write(",".join([log_info['url'], str(start), str(end)])+"\n") @@ -143,7 +155,6 @@ async def processing_coro(download_results_queue, output_dir="/tmp"): logging.debug("Got a chunk of {}. Mapping into process pool".format(process_pool.pool_workers)) - for entry in entries_iter: csv_storage = '{}/certificates/{}'.format(output_dir, entry['log_info']['url'].replace('/', '_')) if not os.path.exists(csv_storage): @@ -272,7 +283,8 @@ def main(): parser.add_argument('-v', dest="verbose", action="store_true", help="Print out verbose/debug info") - parser.add_argument('-c', dest='concurrency_count', action='store', default=50, type=int, help="The number of concurrent downloads to run at a time") + parser.add_argument('-c', dest='concurrency_count', action='store', default=DOWNLOAD_CONCURRENCY, type=int, + help="The number of concurrent downloads to run at a time, default "+str(DOWNLOAD_CONCURRENCY)) args = parser.parse_args() From 91f0808af4948144bf25951710f9e48260f6f161 Mon Sep 17 00:00:00 2001 From: ganti <28899559+ganti@users.noreply.github.com> Date: Fri, 4 Nov 2022 02:46:06 +0100 Subject: [PATCH 04/10] Improve printout of get_certs_and_print aee75b086072dc5d765017738c60d5a48ab0dd7b --- axeman/core.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/axeman/core.py b/axeman/core.py index dd1a8e3..54510b0 100644 --- a/axeman/core.py +++ b/axeman/core.py @@ -2,11 +2,12 @@ import asyncio from collections import deque -import time import uvloop asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) import sys +import time +import json import math import base64 import os @@ -250,18 +251,22 @@ def process_worker(result_info): async def get_certs_and_print(): async with aiohttp.ClientSession(conn_timeout=5) as session: ctls = await certlib.retrieve_all_ctls(session) - print("Found {} CTLs...".format(len(ctls))) + output = [] for log in ctls: try: log_info = await certlib.retrieve_log_info(log, session) except: continue - print(log['description']) - print(" \- URL: {}".format(log['url'])) - print(" \- Owner: {}".format(log_info['operated_by'])) - print(" \- Cert Count: {}".format(locale.format("%d", log_info['tree_size']-1, grouping=True))) - print(" \- Max Block Size: {}\n".format(log_info['block_size'])) + output.append({ + "description": log['description'], + "url": log['url'], + "owner": log_info['operated_by'], + "cert_count": log_info['tree_size']-1, + "max_block_size": log_info['block_size'] + }) + + print(json.dumps(output, indent=4)) def main(): loop = asyncio.get_event_loop() From 9455200462520ecb7738d720708d51f03f34b898 Mon Sep 17 00:00:00 2001 From: ganti <28899559+ganti@users.noreply.github.com> Date: Fri, 4 Nov 2022 02:46:46 +0100 Subject: [PATCH 05/10] Update Version to 1.15 --- axeman/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/axeman/__init__.py b/axeman/__init__.py index d107145..1702bd2 100644 --- a/axeman/__init__.py +++ b/axeman/__init__.py @@ -1,4 +1,4 @@ -__version__ = '1.14' +__version__ = '1.15' if __name__ == "__main__": from .core import main From 582386a07dde238baa44ab975115f245159325e6 Mon Sep 17 00:00:00 2001 From: ganti <28899559+ganti@users.noreply.github.com> Date: Fri, 4 Nov 2022 03:16:17 +0100 Subject: [PATCH 06/10] Fix Connection Timeout Deprecation warning --- axeman/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/axeman/core.py b/axeman/core.py index 54510b0..0e147d5 100644 --- a/axeman/core.py +++ b/axeman/core.py @@ -25,7 +25,7 @@ from OpenSSL import crypto -from . import certlib +import certlib DOWNLOAD_CONCURRENCY = 50 MAX_QUEUE_SIZE = 1000 @@ -88,7 +88,7 @@ async def queue_monitor(log_info, work_deque, download_results_queue): await asyncio.sleep(2) async def retrieve_certificates(loop, url=None, ctl_offset=0, output_directory='/tmp/', concurrency_count=DOWNLOAD_CONCURRENCY): - async with aiohttp.ClientSession(loop=loop, conn_timeout=10) as session: + async with aiohttp.ClientSession(loop=loop, timeout = aiohttp.ClientTimeout(total=10)) as session: ctl_logs = await certlib.retrieve_all_ctls(session) if url: @@ -249,7 +249,7 @@ def process_worker(result_info): return True async def get_certs_and_print(): - async with aiohttp.ClientSession(conn_timeout=5) as session: + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=5)) as session: ctls = await certlib.retrieve_all_ctls(session) output = [] for log in ctls: From 5bc24f5dc14713d2fa5fb19bd27eab3b6281af6e Mon Sep 17 00:00:00 2001 From: ganti <28899559+ganti@users.noreply.github.com> Date: Fri, 4 Nov 2022 02:35:17 +0100 Subject: [PATCH 07/10] Update CT Log List aee75b086072dc5d765017738c60d5a48ab0dd7b --- axeman/certlib.py | 20 +++++++++++--------- axeman/core.py | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/axeman/certlib.py b/axeman/certlib.py index 93abcdc..710f8bf 100644 --- a/axeman/certlib.py +++ b/axeman/certlib.py @@ -6,11 +6,11 @@ from OpenSSL import crypto -CTL_LISTS = 'https://www.gstatic.com/ct/log_list/log_list.json' +CTL_LISTS = 'https://www.gstatic.com/ct/log_list/v2/log_list.json' -CTL_INFO = "http://{}/ct/v1/get-sth" +CTL_INFO = "{}/ct/v1/get-sth" -DOWNLOAD = "http://{}/ct/v1/get-entries?start={}&end={}" +DOWNLOAD = "{}/ct/v1/get-entries?start={}&end={}" from construct import Struct, Byte, Int16ub, Int64ub, Enum, Bytes, Int24ub, this, GreedyBytes, GreedyRange, Terminated, Embedded @@ -42,13 +42,15 @@ async def retrieve_all_ctls(session=None): async with session.get(CTL_LISTS) as response: ctl_lists = await response.json() - logs = ctl_lists['logs'] + operators = ctl_lists['operators'] - for log in logs: - if log['url'].endswith('/'): - log['url'] = log['url'][:-1] - owner = _get_owner(log, ctl_lists['operators']) - log['operated_by'] = owner + logs = list() + for operator in operators: + for log in operator['logs']: + if log['url'].endswith('/'): + log['url'] = log['url'][:-1] + log['operated_by'] = operator['name'] + logs.append(log) return logs diff --git a/axeman/core.py b/axeman/core.py index e9868e3..dd1a8e3 100644 --- a/axeman/core.py +++ b/axeman/core.py @@ -2,6 +2,7 @@ import asyncio from collections import deque +import time import uvloop asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) From 81e6324d6c21b5ce9d37ba281bc58398cee1712e Mon Sep 17 00:00:00 2001 From: ganti <28899559+ganti@users.noreply.github.com> Date: Sat, 5 Nov 2022 00:20:39 +0100 Subject: [PATCH 08/10] Fix import certlib --- axeman/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/axeman/core.py b/axeman/core.py index 0e147d5..2e6d5bc 100644 --- a/axeman/core.py +++ b/axeman/core.py @@ -25,7 +25,7 @@ from OpenSSL import crypto -import certlib +from . import certlib DOWNLOAD_CONCURRENCY = 50 MAX_QUEUE_SIZE = 1000 From 0d0504e7a444d9c9e90d7ab23f39847ca93bf4bd Mon Sep 17 00:00:00 2001 From: ganti <28899559+ganti@users.noreply.github.com> Date: Sat, 5 Nov 2022 01:03:45 +0100 Subject: [PATCH 09/10] Remove https prefix in directory --- .DS_Store | Bin 0 -> 6148 bytes axeman/core.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..6274d00d517479af99e5f397ca9be7a9d96271fb GIT binary patch literal 6148 zcmeHKOHRWu5S^g~BC#numcBwyfT_X>dI6dS6{>_(3P>!o=1eTP6&v1ohAL45><~gT zlKsZ>vEwJj_7D*-p2|7VjEFKcK^A31#60Rcvfxu7r#V*iP`2x;XqswdqQ7X8Z$F_G zJ<~1K!2S1QLo}g#7&rYb9n&KsTlDQ0=4vxvuA6$-+ifqYZt}dSngZVJ^89`K`to)K zeGpOp5V5;ESfRI=tImKk;0!ne&cOE!$Q1|4&J?|O2AlzBV9$V@4*^Xu8g`25=s=fB z0N@nnB+#Xnkepx`4Le16AgrlCO=T-FSkqw-78eaWMNKER;)8AF&*FvC>X<*IaN?-w zy))np3>lc{a3=TvHGY}VB7d0TBWJ)F_-738G+*WmY|8G|gYC&(8_;ghL?o^g1p Date: Sat, 5 Nov 2022 01:34:07 +0100 Subject: [PATCH 10/10] Refactoring Improve printout of get_certs_and_print Update Version to 1.15 Fix Connection Timeout (Deprecation warning) Fix import certlib Remove Comit Error 429 Remove https prefix in directory Remove Error 429 --- axeman/core.py | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/axeman/core.py b/axeman/core.py index dd1a8e3..9cb11dd 100644 --- a/axeman/core.py +++ b/axeman/core.py @@ -2,11 +2,12 @@ import asyncio from collections import deque -import time import uvloop asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) import sys +import time +import json import math import base64 import os @@ -38,29 +39,18 @@ async def download_worker(session, log_info, work_deque, download_queue): logging.debug("[{}] Queueing up blocks {}-{}...".format(log_info['url'], start, end)) - for x in range(30): + + for x in range(3): try: async with session.get(certlib.DOWNLOAD.format(log_info['url'], start, end)) as response: - if response.status == 429: - logging.info("got http status {}-{}".format(x, response.status)) - time.sleep(min(x + 1, 10)) - continue - if response.content_type != 'application/json': - text = await response.text() - logging.error("got response {}-{}-{}".format(x, response.content_type, text)) entry_list = await response.json() logging.debug("[{}] Retrieved blocks {}-{}...".format(log_info['url'], start, end)) - if entry_list.get('error_code', '') == 'rate_limited': - logging.info("{}-{}".format(x, entry_list)) - time.sleep(min(x + 1, 10)) - continue break except Exception as e: logging.error("Exception getting block {}-{}! {}".format(start, end, e)) - time.sleep(min(x+1, 10)) else: # Notorious for else, if we didn't encounter a break our request failed 3 times D: with open('/tmp/fails.csv', 'a') as f: - f.write(",".join([log_info['url'], str(start), str(end)])+"\n") + f.write(",".join([log_info['url'], str(start), str(end)])) return for index, entry in zip(range(start, end + 1), entry_list['entries']): @@ -87,7 +77,7 @@ async def queue_monitor(log_info, work_deque, download_results_queue): await asyncio.sleep(2) async def retrieve_certificates(loop, url=None, ctl_offset=0, output_directory='/tmp/', concurrency_count=DOWNLOAD_CONCURRENCY): - async with aiohttp.ClientSession(loop=loop, conn_timeout=10) as session: + async with aiohttp.ClientSession(loop=loop, timeout = aiohttp.ClientTimeout(total=10)) as session: ctl_logs = await certlib.retrieve_all_ctls(session) if url: @@ -157,7 +147,7 @@ async def processing_coro(download_results_queue, output_dir="/tmp"): logging.debug("Got a chunk of {}. Mapping into process pool".format(process_pool.pool_workers)) for entry in entries_iter: - csv_storage = '{}/certificates/{}'.format(output_dir, entry['log_info']['url'].replace('/', '_')) + csv_storage = '{}/certificates/{}'.format(output_dir, entry['log_info']['url'].replace('https://', '')) if not os.path.exists(csv_storage): print("[{}] Making dir...".format(os.getpid())) os.makedirs(csv_storage) @@ -248,20 +238,24 @@ def process_worker(result_info): return True async def get_certs_and_print(): - async with aiohttp.ClientSession(conn_timeout=5) as session: + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=5)) as session: ctls = await certlib.retrieve_all_ctls(session) - print("Found {} CTLs...".format(len(ctls))) + output = [] for log in ctls: try: log_info = await certlib.retrieve_log_info(log, session) except: continue - print(log['description']) - print(" \- URL: {}".format(log['url'])) - print(" \- Owner: {}".format(log_info['operated_by'])) - print(" \- Cert Count: {}".format(locale.format("%d", log_info['tree_size']-1, grouping=True))) - print(" \- Max Block Size: {}\n".format(log_info['block_size'])) + output.append({ + "description": log['description'], + "url": log['url'], + "owner": log_info['operated_by'], + "cert_count": log_info['tree_size']-1, + "max_block_size": log_info['block_size'] + }) + + print(json.dumps(output, indent=4)) def main(): loop = asyncio.get_event_loop()