From a0bda1cbff0ffe4287807c8f13a306106867f42f Mon Sep 17 00:00:00 2001 From: Brian Harring Date: Tue, 11 Nov 2025 14:40:32 +0100 Subject: [PATCH 1/7] chore: ruff format the code to bring it up to 2025 Signed-off-by: Brian Harring --- notify-webhook.py | 259 ++++++++++++++++++++++++---------------------- 1 file changed, 133 insertions(+), 126 deletions(-) diff --git a/notify-webhook.py b/notify-webhook.py index c0cbbf9..be86086 100755 --- a/notify-webhook.py +++ b/notify-webhook.py @@ -1,24 +1,24 @@ #!/usr/bin/env python3 +import hashlib +import hmac +import json import os import re import subprocess import sys -import hmac -import hashlib -from datetime import datetime -from collections import OrderedDict - import urllib.error import urllib.parse import urllib.request -import json +from collections import OrderedDict +from datetime import datetime EMAIL_RE = re.compile(r"^(\"?)(?P.*)\1\s+<(?P.*)>$") # see git-diff-tree 'RAW OUTPUT FORMAT' # https://git-scm.com/docs/git-diff-tree#_raw_output_format -DIFF_TREE_RE = re.compile(r" \ +DIFF_TREE_RE = re.compile( + r" \ ^: \ (?P[0-9]{6}) \ \s+ \ @@ -34,22 +34,24 @@ (?:\s+ \ (?P\S+) \ )? \ - $", re.MULTILINE | re.VERBOSE) + $", + re.MULTILINE | re.VERBOSE, +) -EMPTY_TREE_HASH = '4b825dc642cb6eb9a060e54bf8d69288fbee4904' -ZEROS = '0000000000000000000000000000000000000000' +EMPTY_TREE_HASH = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" +ZEROS = "0000000000000000000000000000000000000000" def git(args): - args = ['git'] + args + args = ["git"] + args cmd = subprocess.Popen(args, stdout=subprocess.PIPE) details = cmd.stdout.read() - details = details.decode('utf-8', 'replace').strip() + details = details.decode("utf-8", "replace").strip() return details def _git_config(): - raw_config = git(['config', '-l', '-z']) + raw_config = git(["config", "-l", "-z"]) items = raw_config.split("\0") # remove empty items items = filter(lambda i: len(i) > 0, items) @@ -66,9 +68,9 @@ def get_config(key, default=None): def get_repo_name(): - if get_config('core.bare', 'false') == 'true': + if get_config("core.bare", "false") == "true": name = os.path.basename(os.getcwd()) - if name.endswith('.git'): + if name.endswith(".git"): name = name[:-4] return name @@ -77,19 +79,19 @@ def get_repo_name(): def get_repo_description(): - description = get_config('meta.description') + description = get_config("meta.description") if description: return description - description = get_config('gitweb.description') + description = get_config("gitweb.description") if description: return description - if os.path.exists('description'): - with open('description', 'r') as fp: + if os.path.exists("description"): + with open("description", "r") as fp: return fp.read() - return '' + return "" def extract_name_email(s): @@ -97,20 +99,22 @@ def extract_name_email(s): _ = p.search(s.strip()) if not _: return (None, None) - name = (_.group('name') or '').strip() - email = (_.group('email') or '').strip() + name = (_.group("name") or "").strip() + email = (_.group("email") or "").strip() return (name, email) def get_repo_owner(): # Explicit keys - repo_owner_name = get_config('meta.ownername') - repo_owner_email = get_config('meta.owneremail') + repo_owner_name = get_config("meta.ownername") + repo_owner_email = get_config("meta.owneremail") # Fallback to gitweb - gitweb_owner = get_config('gitweb.owner') - if gitweb_owner is not None and \ - repo_owner_name is None and \ - repo_owner_email is None: + gitweb_owner = get_config("gitweb.owner") + if ( + gitweb_owner is not None + and repo_owner_name is None + and repo_owner_email is None + ): (name, email) = extract_name_email(gitweb_owner) if name is not None: repo_owner_name = name @@ -119,8 +123,7 @@ def get_repo_owner(): # Fallback to the repo if repo_owner_name is None or repo_owner_email is None: # You cannot include -n1 because it is processed before --reverse - logmsg = git(['log', '--reverse', '--format=%an%x09%ae'] - ).split("\n")[0] + logmsg = git(["log", "--reverse", "--format=%an%x09%ae"]).split("\n")[0] # These will never be null (name, email) = logmsg.split("\t") if repo_owner_name is None: @@ -131,23 +134,24 @@ def get_repo_owner(): return (repo_owner_name, repo_owner_email) -POST_URL = get_config('hooks.webhookurl') -POST_URLS = get_config('hooks.webhookurls') -POST_USER = get_config('hooks.authuser') -POST_PASS = get_config('hooks.authpass') -POST_REALM = get_config('hooks.authrealm') -POST_SECRET_TOKEN = get_config('hooks.secrettoken') +POST_URL = get_config("hooks.webhookurl") +POST_URLS = get_config("hooks.webhookurls") +POST_USER = get_config("hooks.authuser") +POST_PASS = get_config("hooks.authpass") +POST_REALM = get_config("hooks.authrealm") +POST_SECRET_TOKEN = get_config("hooks.secrettoken") POST_CONTENTTYPE = get_config( - 'hooks.webhook-contenttype', 'application/x-www-form-urlencoded') -POST_TIMEOUT = get_config('hooks.timeout') -DEBUG = get_config('hooks.webhook-debug') -REPO_URL = get_config('meta.url') -COMMIT_URL = get_config('meta.commiturl') -COMPARE_URL = get_config('meta.compareurl') + "hooks.webhook-contenttype", "application/x-www-form-urlencoded" +) +POST_TIMEOUT = get_config("hooks.timeout") +DEBUG = get_config("hooks.webhook-debug") +REPO_URL = get_config("meta.url") +COMMIT_URL = get_config("meta.commiturl") +COMPARE_URL = get_config("meta.compareurl") if COMMIT_URL is None and REPO_URL is not None: - COMMIT_URL = REPO_URL + r'/commit/%s' + COMMIT_URL = REPO_URL + r"/commit/%s" if COMPARE_URL is None and REPO_URL is not None: - COMPARE_URL = REPO_URL + r'/compare/%s..%s' + COMPARE_URL = REPO_URL + r"/compare/%s..%s" REPO_NAME = get_repo_name() REPO_DESC = get_repo_description() (REPO_OWNER_NAME, REPO_OWNER_EMAIL) = get_repo_owner() @@ -157,50 +161,54 @@ def get_revisions(old, new, head_commit=False): # pylint: disable=R0914,R0912 if old == ZEROS: # ref creation - commit_range = '%s..%s' % (EMPTY_TREE_HASH, new) + commit_range = "%s..%s" % (EMPTY_TREE_HASH, new) elif new == ZEROS: # ref deletion - commit_range = '%s..%s' % (old, EMPTY_TREE_HASH) + commit_range = "%s..%s" % (old, EMPTY_TREE_HASH) else: - commit_range = '%s..%s' % (old, new) + commit_range = "%s..%s" % (old, new) - revs = git(['rev-list', '--pretty=medium', '--reverse', commit_range]) - sections = revs.split('\n\n') + revs = git(["rev-list", "--pretty=medium", "--reverse", commit_range]) + sections = revs.split("\n\n") revisions = [] s = 0 while s < len(sections): - lines = sections[s].split('\n') + lines = sections[s].split("\n") # first line is 'commit HASH\n' - props = {'id': lines[0].strip().split( - ' ')[1], 'added': [], 'removed': [], 'modified': []} + props = { + "id": lines[0].strip().split(" ")[1], + "added": [], + "removed": [], + "modified": [], + } # call git diff-tree and get the file changes - output = git(['diff-tree', '-r', '-C', '%s' % props['id']]) + output = git(["diff-tree", "-r", "-C", "%s" % props["id"]]) # sort the changes into the added/modified/removed lists for i in DIFF_TREE_RE.finditer(output): item = i.groupdict() - if item['status'] == 'A': + if item["status"] == "A": # addition of a file - props['added'].append(item['file1']) - elif item['status'][0] == 'C': + props["added"].append(item["file1"]) + elif item["status"][0] == "C": # copy of a file into a new one - props['added'].append(item['file2']) - elif item['status'] == 'D': + props["added"].append(item["file2"]) + elif item["status"] == "D": # deletion of a file - props['removed'].append(item['file1']) - elif item['status'] == 'M': + props["removed"].append(item["file1"]) + elif item["status"] == "M": # modification of the contents or mode of a file - props['modified'].append(item['file1']) - elif item['status'][0] == 'R': + props["modified"].append(item["file1"]) + elif item["status"][0] == "R": # renaming of a file - props['removed'].append(item['file1']) - props['added'].append(item['file2']) - elif item['status'] == 'T': + props["removed"].append(item["file1"]) + props["added"].append(item["file2"]) + elif item["status"] == "T": # change in the type of the file - props['modified'].append(item['file1']) + props["modified"].append(item["file1"]) else: # Covers U (file is unmerged) # and X ("unknown" change type, usually an error) @@ -211,29 +219,27 @@ def get_revisions(old, new, head_commit=False): # read the header for l in lines[1:]: - key, val = l.split(' ', 1) + key, val = l.split(" ", 1) props[key[:-1].lower()] = val.strip() # read the commit message # Strip leading tabs/4-spaces on the message - props['message'] = re.sub( - r'^(\t| {4})', '', sections[s + 1], 0, re.MULTILINE) + props["message"] = re.sub(r"^(\t| {4})", "", sections[s + 1], 0, re.MULTILINE) # use github time format - basetime = datetime.strptime( - props['date'][:-6], "%a %b %d %H:%M:%S %Y") - tzstr = props['date'][-5:] - props['date'] = basetime.strftime('%Y-%m-%dT%H:%M:%S') + tzstr + basetime = datetime.strptime(props["date"][:-6], "%a %b %d %H:%M:%S %Y") + tzstr = props["date"][-5:] + props["date"] = basetime.strftime("%Y-%m-%dT%H:%M:%S") + tzstr # split up author - m = EMAIL_RE.match(props['author']) + m = EMAIL_RE.match(props["author"]) if m: - props['name'] = m.group(1) - props['email'] = m.group(2) + props["name"] = m.group(1) + props["email"] = m.group(2) else: - props['name'] = 'unknown' - props['email'] = 'unknown' - del props['author'] + props["name"] = "unknown" + props["email"] = "unknown" + del props["author"] if head_commit: return props @@ -245,25 +251,25 @@ def get_revisions(old, new, head_commit=False): def get_base_ref(commit, ref): - branches = git(['branch', '--contains', commit]).split('\n') - CURR_BRANCH_RE = re.compile(r'^\* \w+$') + branches = git(["branch", "--contains", commit]).split("\n") + CURR_BRANCH_RE = re.compile(r"^\* \w+$") curr_branch = None if len(branches) > 1: on_master = False for branch in branches: if CURR_BRANCH_RE.match(branch): - curr_branch = branch.strip('* \n') - elif branch.strip() == 'master': + curr_branch = branch.strip("* \n") + elif branch.strip() == "master": on_master = True if curr_branch is None and on_master: - curr_branch = 'master' + curr_branch = "master" if curr_branch is None: - curr_branch = branches[0].strip('* \n') + curr_branch = branches[0].strip("* \n") - base_ref = 'refs/heads/%s' % curr_branch + base_ref = "refs/heads/%s" % curr_branch if base_ref == ref: return None @@ -271,16 +277,17 @@ def get_base_ref(commit, ref): # Fallback return base_ref + # http://stackoverflow.com/a/20559031 def purify(obj): - if hasattr(obj, 'items'): + if hasattr(obj, "items"): newobj = type(obj)() for k in obj: if k is not None and obj[k] is not None: newobj[k] = purify(obj[k]) - elif hasattr(obj, '__iter__'): + elif hasattr(obj, "__iter__"): newobj = [] for k in obj: if k is not None: @@ -298,19 +305,16 @@ def make_json(old, new, ref): compareurl = COMPARE_URL % (old, new) data = { - 'before': old, - 'after': new, - 'ref': ref, - 'compare': compareurl, - 'repository': { - 'url': REPO_URL, - 'name': REPO_NAME, - 'description': REPO_DESC, - 'owner': { - 'name': REPO_OWNER_NAME, - 'email': REPO_OWNER_EMAIL - } - } + "before": old, + "after": new, + "ref": ref, + "compare": compareurl, + "repository": { + "url": REPO_URL, + "name": REPO_NAME, + "description": REPO_DESC, + "owner": {"name": REPO_OWNER_NAME, "email": REPO_OWNER_EMAIL}, + }, } revisions = get_revisions(old, new) @@ -318,32 +322,35 @@ def make_json(old, new, ref): for r in revisions: url = None if COMMIT_URL is not None: - url = COMMIT_URL % r['id'] - commits.append({'id': r['id'], - 'author': {'name': r['name'], 'email': r['email']}, - 'url': url, - 'message': r['message'], - 'timestamp': r['date'], - 'added': r['added'], - 'removed': r['removed'], - 'modified': r['modified'] - }) - data['commits'] = commits - data['size'] = len(commits) - data['head_commit'] = get_revisions(old, new, True) + url = COMMIT_URL % r["id"] + commits.append( + { + "id": r["id"], + "author": {"name": r["name"], "email": r["email"]}, + "url": url, + "message": r["message"], + "timestamp": r["date"], + "added": r["added"], + "removed": r["removed"], + "modified": r["modified"], + } + ) + data["commits"] = commits + data["size"] = len(commits) + data["head_commit"] = get_revisions(old, new, True) base_ref = get_base_ref(new, ref) if base_ref: - data['base_ref'] = base_ref + data["base_ref"] = base_ref return json.dumps(data) def post_encode_data(contenttype, rawdata): - if contenttype == 'application/json': - return rawdata.encode('UTF-8') - if contenttype == 'application/x-www-form-urlencoded': - return urllib.parse.urlencode({'payload': rawdata}).encode('UTF-8') + if contenttype == "application/json": + return rawdata.encode("UTF-8") + if contenttype == "application/x-www-form-urlencoded": + return urllib.parse.urlencode({"payload": rawdata}).encode("UTF-8") assert False, "Unsupported data encoding" return None @@ -365,15 +372,15 @@ def build_handler(realm, url, user, passwd): def post(url, data): headers = { - 'Content-Type': POST_CONTENTTYPE, - 'X-GitHub-Event': 'push', + "Content-Type": POST_CONTENTTYPE, + "X-GitHub-Event": "push", } postdata = post_encode_data(POST_CONTENTTYPE, data) if POST_SECRET_TOKEN is not None: hmacobj = hmac.new(POST_SECRET_TOKEN, postdata, hashlib.sha1) - signature = 'sha1=' + hmacobj.hexdigest() - headers['X-Hub-Signature'] = signature + signature = "sha1=" + hmacobj.hexdigest() + headers["X-Hub-Signature"] = signature request = urllib.request.Request(url, postdata, headers) handler = build_handler(POST_USER, url, POST_PASS, POST_REALM) @@ -393,7 +400,7 @@ def post(url, data): def main(lines): for line in lines: - old, new, ref = line.strip().split(' ') + old, new, ref = line.strip().split(" ") data = make_json(old, new, ref) if DEBUG: print(data) @@ -401,10 +408,10 @@ def main(lines): if POST_URL: urls.append(POST_URL) if POST_URLS: - urls.extend(re.split(r',\s*', POST_URLS)) + urls.extend(re.split(r",\s*", POST_URLS)) for url in urls: post(url.strip(), data) -if __name__ == '__main__': +if __name__ == "__main__": main(sys.stdin) From 68c4e39a3ca309e36c3e3fc08ce27ff99218afd3 Mon Sep 17 00:00:00 2001 From: Brian Harring Date: Tue, 11 Nov 2025 17:36:07 +0100 Subject: [PATCH 2/7] chore: simplify POST_URLS Signed-off-by: Brian Harring --- notify-webhook.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/notify-webhook.py b/notify-webhook.py index be86086..379907f 100755 --- a/notify-webhook.py +++ b/notify-webhook.py @@ -134,8 +134,11 @@ def get_repo_owner(): return (repo_owner_name, repo_owner_email) -POST_URL = get_config("hooks.webhookurl") -POST_URLS = get_config("hooks.webhookurls") +POST_URLS = get_config("hooks.webhookurl", "").strip().split() +# comma delimited format. Tolerate dangling commas. +POST_URLS.extend( + x.strip() for x in get_config("hooks.webhookurls", "").split(",") if x.strip() +) POST_USER = get_config("hooks.authuser") POST_PASS = get_config("hooks.authpass") POST_REALM = get_config("hooks.authrealm") @@ -404,13 +407,9 @@ def main(lines): data = make_json(old, new, ref) if DEBUG: print(data) - urls = [] - if POST_URL: - urls.append(POST_URL) - if POST_URLS: - urls.extend(re.split(r",\s*", POST_URLS)) - for url in urls: - post(url.strip(), data) + + for url in POST_URLS: + post(url, data) if __name__ == "__main__": From 28c763f1c438791e4090e068517b75a9d4cd566f Mon Sep 17 00:00:00 2001 From: Brian Harring Date: Tue, 11 Nov 2025 17:45:33 +0100 Subject: [PATCH 3/7] chore: make this debuggable and pdb.set_trace() compatible Signed-off-by: Brian Harring --- notify-webhook.py | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/notify-webhook.py b/notify-webhook.py index 379907f..6ceb348 100755 --- a/notify-webhook.py +++ b/notify-webhook.py @@ -300,7 +300,7 @@ def purify(obj): return type(obj)(newobj) -def make_json(old, new, ref): +def make_json(old, new, ref, **json_serialize_kwargs): # Lots more fields could be added # https://developer.github.com/v3/activity/events/types/#pushevent compareurl = None @@ -346,7 +346,7 @@ def make_json(old, new, ref): if base_ref: data["base_ref"] = base_ref - return json.dumps(data) + return json.dumps(data, **json_serialize_kwargs) def post_encode_data(contenttype, rawdata): @@ -401,16 +401,37 @@ def post(url, data): print(errmsg, file=sys.stderr) -def main(lines): - for line in lines: - old, new, ref = line.strip().split(" ") - data = make_json(old, new, ref) - if DEBUG: - print(data) +def main(cli_args=sys.argv[1:], stdin=sys.stdin): + # disable posting for local invocations. + post_urls = POST_URLS + debug = DEBUG + if cli_args: + debug = True + post_urls = [] + if len(cli_args) % 3: + raise Exception("cli args must be in groups of 3; old new ref") + + # make it simpler for cli invocation to behave like hook mode, without + # making the humanhave to do things exactly the same. + def f(val): + if not val.strip("0"): + return ZEROS + # force full sha like hook does + return git(["rev-parse", val]) + + i = iter(cli_args) + targets = zip(map(f, i), map(f, i), i) + else: + targets = (line.strip().split(" ", 2) for line in stdin) # pyright: ignore[reportAssignmentType] + + for old, new, ref in targets: + json_data = make_json(old, new, ref, indent=2 if debug else None) + if debug: + print(json_data) - for url in POST_URLS: - post(url, data) + for url in post_urls: + post(url, json_data) if __name__ == "__main__": - main(sys.stdin) + main() From 94e6ebc07227a4ef51c743392c50e20dd6678bb2 Mon Sep 17 00:00:00 2001 From: Brian Harring Date: Tue, 11 Nov 2025 15:30:24 +0100 Subject: [PATCH 4/7] Fix exception for deletion events and force annotation Fixes * added boolean deleted and created fields per v3 spec. These indicate if it's a ref deletion, new ref created, etc. * fixed the exception for deletion events * forced data annotation via dataclass, thus documenting the event structure. I did this explicitly because there are no tests and I had to rework a lot of this, so might as well use a dataclass to ensure I didn't drop any event data. This is not runtime enforcement however. * fixed the comparison urls sent for a ref creation event. This is larger than intended, but the data validation I had to shove in to verify I didn't break anything. The result is simpler/cleaner however. Signed-off-by: Brian Harring --- notify-webhook.py | 180 +++++++++++++++++++++++++++++++--------------- 1 file changed, 124 insertions(+), 56 deletions(-) diff --git a/notify-webhook.py b/notify-webhook.py index 6ceb348..68a64b7 100755 --- a/notify-webhook.py +++ b/notify-webhook.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import dataclasses import hashlib import hmac import json @@ -7,6 +8,7 @@ import re import subprocess import sys +import typing import urllib.error import urllib.parse import urllib.request @@ -160,35 +162,34 @@ def get_repo_owner(): (REPO_OWNER_NAME, REPO_OWNER_EMAIL) = get_repo_owner() -def get_revisions(old, new, head_commit=False): - # pylint: disable=R0914,R0912 - if old == ZEROS: - # ref creation - commit_range = "%s..%s" % (EMPTY_TREE_HASH, new) - elif new == ZEROS: - # ref deletion - commit_range = "%s..%s" % (old, EMPTY_TREE_HASH) - else: - commit_range = "%s..%s" % (old, new) +def get_revisions( + old, new, commit_url: None | str = None +) -> typing.Iterable["CommitData"]: + if old == new: + # oh get bent. Someone tried pushing a freshly init'd repo. + # Not sure if it's possible in the real world, but account for it. + return - revs = git(["rev-list", "--pretty=medium", "--reverse", commit_range]) + revs = git(["rev-list", "--pretty=medium", "--reverse", f"{old}..{new}"]) sections = revs.split("\n\n") - revisions = [] s = 0 while s < len(sections): lines = sections[s].split("\n") # first line is 'commit HASH\n' + sha = lines[0].strip().split(" ")[1] + props = { - "id": lines[0].strip().split(" ")[1], + "sha": sha, "added": [], "removed": [], "modified": [], + "url": commit_url % sha if commit_url else None, } # call git diff-tree and get the file changes - output = git(["diff-tree", "-r", "-C", "%s" % props["id"]]) + output = git(["diff-tree", "-r", "-C", "%s" % props["sha"]]) # sort the changes into the added/modified/removed lists for i in DIFF_TREE_RE.finditer(output): @@ -227,31 +228,24 @@ def get_revisions(old, new, head_commit=False): # read the commit message # Strip leading tabs/4-spaces on the message - props["message"] = re.sub(r"^(\t| {4})", "", sections[s + 1], 0, re.MULTILINE) + props["message"] = re.sub( + r"^(\t| {4})", "", sections[s + 1], count=0, flags=re.MULTILINE + ) # use github time format basetime = datetime.strptime(props["date"][:-6], "%a %b %d %H:%M:%S %Y") tzstr = props["date"][-5:] props["date"] = basetime.strftime("%Y-%m-%dT%H:%M:%S") + tzstr - # split up author m = EMAIL_RE.match(props["author"]) if m: - props["name"] = m.group(1) - props["email"] = m.group(2) + props["author"] = AuthorData(name=m.group(1), email=m.group(2)) else: - props["name"] = "unknown" - props["email"] = "unknown" - del props["author"] - - if head_commit: - return props + props["author"] = AuthorData(name="unknown", email="unkown") - revisions.append(props) + yield CommitData(**props) s += 2 - return revisions - def get_base_ref(commit, ref): branches = git(["branch", "--contains", commit]).split("\n") @@ -300,53 +294,127 @@ def purify(obj): return type(obj)(newobj) +class ToDict: + def as_dict(self): + d = dataclasses.asdict(self) # pyright: ignore[reportArgumentType] + d.update((k, v.as_dict()) for k, v in d.items() if isinstance(v, ToDict)) + return d + + +# use dataclasses to force the necessary shape of events. +# See https://web.archive.org/web/20201113233708/https://developer.github.com/webhooks/event-payloads/#push ; +# the original code was written against v3, github moved on and is marking a fair amount more as required. +# Continue what this code was originally written against, but also add things missing from that event spec. + + +@dataclasses.dataclass(kw_only=True) +class AuthorData(ToDict): + name: str + email: str + + +@dataclasses.dataclass(kw_only=True) +class _BaseCommitData(ToDict): + # This is the basic definition. Dataclass compiles and __init__ on the fly, + # we split the classes to allow that to be reused. + sha: str + message: str + author: AuthorData + added: list[str] + removed: list[str] + modified: list[str] + + # outside spec from above. + date: str + url: None | str + + +class CommitData(_BaseCommitData): + # This gets directly injected into the resultant dict. It's secondary to allow + # everything above to have runtime type validation. + extras: dict[str, typing.Any] + + def __init__(self, **kwargs) -> None: + # dataclasses don't allow extra params; we want the type validation, thus + # we isolate what we allow dataclass to handle + extras = kwargs.pop("extras", {}) + extras.update( + { + k: kwargs.pop(k) + for k in list(kwargs) + if k not in self.__dataclass_fields__ + } + ) + super().__init__(**kwargs) + self.extras = extras + + def as_dict(self): + d = super().as_dict() + # we've been returning this historically, so do so. + d["id"] = self.sha + d.update(self.extras) + return d + + +@dataclasses.dataclass(kw_only=True) +class PushEvent(ToDict): + ref: str + before: str + after: str + repository: dict[str, str | dict] + commits: list[CommitData] + base_ref: None | str = None + compare: None | str = None + deleted: bool = False + created: bool = False + + def as_dict(self): + d = super().as_dict() + d["head_commit"] = ( + None if self.deleted or not self.commits else d["commits"][-1] + ) + # This is outside the spec above, but we've been returning it, so continue to do so. + d["size"] = len(self.commits) + return d + + def make_json(old, new, ref, **json_serialize_kwargs): # Lots more fields could be added # https://developer.github.com/v3/activity/events/types/#pushevent - compareurl = None - if COMPARE_URL: - compareurl = COMPARE_URL % (old, new) + deleted = new == ZEROS + # This is the real sha of old, used for compare urls and for internal git calls. + old_sha = EMPTY_TREE_HASH if old == ZEROS else old data = { "before": old, "after": new, "ref": ref, - "compare": compareurl, + "deleted": deleted, + "created": not deleted and old == ZEROS, + # impossible to compare for a delete, so don't give the compare. + "compare": ( + COMPARE_URL % (old_sha, new) if (COMPARE_URL and not deleted) else None + ), "repository": { "url": REPO_URL, "name": REPO_NAME, "description": REPO_DESC, "owner": {"name": REPO_OWNER_NAME, "email": REPO_OWNER_EMAIL}, }, + "commits": [], + "base_ref": None, } - revisions = get_revisions(old, new) - commits = [] - for r in revisions: - url = None - if COMMIT_URL is not None: - url = COMMIT_URL % r["id"] - commits.append( - { - "id": r["id"], - "author": {"name": r["name"], "email": r["email"]}, - "url": url, - "message": r["message"], - "timestamp": r["date"], - "added": r["added"], - "removed": r["removed"], - "modified": r["modified"], - } - ) - data["commits"] = commits - data["size"] = len(commits) - data["head_commit"] = get_revisions(old, new, True) + if not deleted: + data["commits"] = list(get_revisions(old_sha, new, COMMIT_URL)) + + if base_ref := get_base_ref(new, ref): + data["base_ref"] = base_ref - base_ref = get_base_ref(new, ref) - if base_ref: - data["base_ref"] = base_ref + # validate it fully. + event = PushEvent(**data) - return json.dumps(data, **json_serialize_kwargs) + return json.dumps(event.as_dict(), **json_serialize_kwargs) def post_encode_data(contenttype, rawdata): From d45f79513826166de809d6f6f2e944ae142aeb1e Mon Sep 17 00:00:00 2001 From: Brian Harring Date: Tue, 11 Nov 2025 18:46:39 +0100 Subject: [PATCH 5/7] fix:bail if git command fails This is what was masking the failure in handling deletion events and certain creation events. Signed-off-by: Brian Harring --- notify-webhook.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/notify-webhook.py b/notify-webhook.py index 68a64b7..848cd80 100755 --- a/notify-webhook.py +++ b/notify-webhook.py @@ -46,10 +46,10 @@ def git(args): args = ["git"] + args - cmd = subprocess.Popen(args, stdout=subprocess.PIPE) - details = cmd.stdout.read() - details = details.decode("utf-8", "replace").strip() - return details + result = subprocess.run( + args, stdout=subprocess.PIPE, stdin=subprocess.DEVNULL, check=True + ) + return result.stdout.decode("utf-8", "replace").strip() def _git_config(): From 5658a5486588afc544a5eb1648bb2cd8eb8fda34 Mon Sep 17 00:00:00 2001 From: Brian Harring Date: Tue, 11 Nov 2025 19:06:21 +0100 Subject: [PATCH 6/7] fix name and email parsing See pr #27 mostly addressed this, but this version is more paranoid. When this lands, #27 can be closed out. Signed-off-by: Brian Harring --- notify-webhook.py | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/notify-webhook.py b/notify-webhook.py index 848cd80..ad4617c 100755 --- a/notify-webhook.py +++ b/notify-webhook.py @@ -15,8 +15,6 @@ from collections import OrderedDict from datetime import datetime -EMAIL_RE = re.compile(r"^(\"?)(?P.*)\1\s+<(?P.*)>$") - # see git-diff-tree 'RAW OUTPUT FORMAT' # https://git-scm.com/docs/git-diff-tree#_raw_output_format DIFF_TREE_RE = re.compile( @@ -96,14 +94,32 @@ def get_repo_description(): return "" -def extract_name_email(s): - p = EMAIL_RE - _ = p.search(s.strip()) - if not _: - return (None, None) - name = (_.group("name") or "").strip() - email = (_.group("email") or "").strip() - return (name, email) +_STRIP_QUOTED_NAME_RE = re.compile(r"^\s*([\"'])\s*(?P.*?)\s*\1\s*$") + + +def _strip_quoted_name(val): + # I can't think of why it would come through with \' beyond presumably git CLI escaping, but + # whatever, cover that base too. + if m := _STRIP_QUOTED_NAME_RE.match(val): + return m.groupdict()["value"] + return val + + +_EMAIL_RE = re.compile(r"\s*(?P[^<]+?)\s*<\s*(?P[^>]+?)\s*>\s*$") + + +def extract_name_email(s, default_missing=""): + s = s.strip() + + if m := _EMAIL_RE.match(s): + g = m.groupdict() + # compatability: strip out quotation, since the original code tried to do this. + + return (_strip_quoted_name(g["name"]), g["email"]) + # guess a bit + if "@" in s: + return default_missing, s + return (_strip_quoted_name(s) if s else default_missing, default_missing) def get_repo_owner(): @@ -237,12 +253,8 @@ def get_revisions( tzstr = props["date"][-5:] props["date"] = basetime.strftime("%Y-%m-%dT%H:%M:%S") + tzstr - m = EMAIL_RE.match(props["author"]) - if m: - props["author"] = AuthorData(name=m.group(1), email=m.group(2)) - else: - props["author"] = AuthorData(name="unknown", email="unkown") - + (name, email) = extract_name_email(props["author"], "unknown") + props["author"] = AuthorData(name=name, email=email) yield CommitData(**props) s += 2 From d35cbc3fa48817df23ca2299afa043c1560dcb90 Mon Sep 17 00:00:00 2001 From: Brian Harring Date: Tue, 11 Nov 2025 20:21:00 +0100 Subject: [PATCH 7/7] fix diff-tree integration for commit data. Note: this actually fails on the first commit in history; diff-tree is against the parent. It's edge case, but it exists. This is a seperated PR from #29 for review reasons. When this and that lands, issue #28 can be closed. Signed-off-by: Brian Harring --- notify-webhook.py | 132 ++++++++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 57 deletions(-) diff --git a/notify-webhook.py b/notify-webhook.py index ad4617c..7ede2b6 100755 --- a/notify-webhook.py +++ b/notify-webhook.py @@ -4,6 +4,7 @@ import hashlib import hmac import json +import logging import os import re import subprocess @@ -12,32 +13,9 @@ import urllib.error import urllib.parse import urllib.request -from collections import OrderedDict +from collections import OrderedDict, defaultdict from datetime import datetime -# see git-diff-tree 'RAW OUTPUT FORMAT' -# https://git-scm.com/docs/git-diff-tree#_raw_output_format -DIFF_TREE_RE = re.compile( - r" \ - ^: \ - (?P[0-9]{6}) \ - \s+ \ - (?P[0-9]{6}) \ - \s+ \ - (?P[0-9a-f]{7,40}) \ - \s+ \ - (?P[0-9a-f]{7,40}) \ - \s+ \ - (?P[ADTUX]|[CR][0-9]{1,3}|M[0-9]{0,3}) \ - \s+ \ - (?P\S+) \ - (?:\s+ \ - (?P\S+) \ - )? \ - $", - re.MULTILINE | re.VERBOSE, -) - EMPTY_TREE_HASH = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" ZEROS = "0000000000000000000000000000000000000000" @@ -203,39 +181,16 @@ def get_revisions( "modified": [], "url": commit_url % sha if commit_url else None, } - - # call git diff-tree and get the file changes - output = git(["diff-tree", "-r", "-C", "%s" % props["sha"]]) - - # sort the changes into the added/modified/removed lists - for i in DIFF_TREE_RE.finditer(output): - item = i.groupdict() - if item["status"] == "A": - # addition of a file - props["added"].append(item["file1"]) - elif item["status"][0] == "C": - # copy of a file into a new one - props["added"].append(item["file2"]) - elif item["status"] == "D": - # deletion of a file - props["removed"].append(item["file1"]) - elif item["status"] == "M": - # modification of the contents or mode of a file - props["modified"].append(item["file1"]) - elif item["status"][0] == "R": - # renaming of a file - props["removed"].append(item["file1"]) - props["added"].append(item["file2"]) - elif item["status"] == "T": - # change in the type of the file - props["modified"].append(item["file1"]) - else: - # Covers U (file is unmerged) - # and X ("unknown" change type, usually an error) - # When we get X, we do not know what actually happened so - # it's safest just to ignore it. We shouldn't be seeing U - # anyway, so we can ignore that too. - pass + props.update( + get_tree_changes_from_commit( + props["sha"], + # diff-tree doesn't report properly for the first commit in history; + # force the parent if it's the first. + forced_parent=( + EMPTY_TREE_HASH if s == 0 and old == EMPTY_TREE_HASH else None + ), + ) + ) # read the header for l in lines[1:]: @@ -259,6 +214,69 @@ def get_revisions( s += 2 +def get_tree_changes_from_commit( + sha: str, forced_parent: str | None = None +) -> typing.Mapping[str, list[str]]: + raw_tree = git( + [ + "diff-tree", + "--raw", + "-z", + "-r", + # detect copies and renames + "-C", + "-M", + "--no-commit-id", + # force the simple format used below. + "--name-status", + sha if not forced_parent else f"{forced_parent}..{sha}", + # ensure git knows that was a revish, flushing out any code bugs. + "--", + ] + ) + # see git-diff-tree 'RAW OUTPUT FORMAT' for the actions involved + # https://git-scm.com/docs/git-diff-tree#_raw_output_forma + + # the last record still has a null which would trigger another record + # parsing loop + chunks = iter(raw_tree.split("\0")[:-1]) + + changes = defaultdict(list) + for action in chunks: + # actions can carry a confidence integer percent, thus strip it. + action = action[0] + match action: + case "A": + changes["added"].append(next(chunks)) + case "C": + # copy. Just record the addition + next(chunks) # discard source file + changes["added"].append(next(chunks)) + case "D": + changes["removed"].append(next(chunks)) + case "M": + changes["modified"].append(next(chunks)) + case "R": + changes["removed"].append(next(chunks)) + changes["added"].append(next(chunks)) + case "T": + # change of type of file. Symlink replacing a file, file replacing a symlink, etc. + changes["added"].append(next(chunks)) + case "U": + logging.warning( + "encountured U status in diff-tree; this impossible, there is a bug in this script" + ) + case "X": + logging.warning( + "encontured status X in diff-tree; please report this, it probably a bug in git itself" + ) + case _: + logging.warning( + f"unsupported action encountered during diff-tree: {action!r}" + ) + return changes + + def get_base_ref(commit, ref): branches = git(["branch", "--contains", commit]).split("\n") CURR_BRANCH_RE = re.compile(r"^\* \w+$")