diff --git a/.gitignore b/.gitignore index 5d20f84..770904e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ token repos/ -config.yml +*config.yml __pycache__/ diff --git a/config.example.yml b/config.example.yml index 96a2d3c..372fad9 100644 --- a/config.example.yml +++ b/config.example.yml @@ -2,8 +2,10 @@ repo_folder: repos repos: - org: organization - tagged_releases: false + tagged_releases: true branch_releases: false + # days for calculating release cadence + sliding_release_window: 30 name: repo branches: main: develop diff --git a/github_stats/github_api.py b/github_stats/github_api.py index 99aca91..8c118d7 100644 --- a/github_stats/github_api.py +++ b/github_stats/github_api.py @@ -16,6 +16,7 @@ import requests from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry +import statistics import time import urllib.parse @@ -31,7 +32,7 @@ class GithubAccess(object): BASE_URL = "https://api.github.com/" - def __init__(self, config): + def __init__(self, config: dict): self.log = logging.getLogger("github-stats.collection") auth_token = os.environ.get("GITHUB_TOKEN", None) if not auth_token: @@ -55,22 +56,23 @@ def __init__(self, config): self._request.mount("https://", adapter) self._request.headers.update(headers) self.repo = Repo(config) + self.release_window: int = config["repo"].get("sliding_release_window", 30) - self.tagged_releases = config["repo"].get("tagged_releases", False) - self.branch_releases = config["repo"].get("branch_releases", False) + self.tagged_releases: bool = config["repo"].get("tagged_releases", False) + self.branch_releases: bool = config["repo"].get("branch_releases", False) if self.tagged_releases and self.branch_releases: raise Exception("Can't have tagged releases and branch releases!") - self.org = config["repo"]["org"] - self.repo_name = f"{self.org}/{config['repo']['name']}" - self.ignored_workflows = config["repo"].get("ignored_workflows", list()) - self.ignored_statuses = config["repo"].get("ignored_statuses", ["queued"]) - self.main_branch = config["repo"]["branches"].get("main", "main") - self.release_branch = config["repo"]["branches"].get("release", "main") - self.non_user_events = config["repo"].get("non_user_events", ["schedule"]) - self.per_page = config.get("query", {}).get("results_per_page", 500) - self.special_logins = config["repo"].get("special_logins", {}) - self.special_names = {v: k for k, v in self.special_logins.items()} - self.broken_users = config["repo"].get("broken_users", []) + self.org: str = config["repo"]["org"] + self.repo_name: str = f"{self.org}/{config['repo']['name']}" + self.ignored_workflows: list = config["repo"].get("ignored_workflows", list()) + self.ignored_statuses: list = config["repo"].get("ignored_statuses", ["queued"]) + self.main_branch: str = config["repo"]["branches"].get("main", "main") + self.release_branch: str = config["repo"]["branches"].get("release", "main") + self.non_user_events: list = config["repo"].get("non_user_events", ["schedule"]) + self.per_page: int = config.get("query", {}).get("results_per_page", 500) + self.special_logins: dict = config["repo"].get("special_logins", {}) + self.special_names: dict = {v: k for k, v in self.special_logins.items()} + self.broken_users: list = config["repo"].get("broken_users", []) self.tag_matches, self.bug_matches, self.pr_bug_matches = load_patterns( config["repo"].get("tag_patterns", []), @@ -80,7 +82,7 @@ def __init__(self, config): """ Many label matching patterns """ - self.label_matches = { + self.label_matches: dict = { labelname: labels for labelname, labels in config["repo"].get("additional_labels", {}).items() } @@ -102,7 +104,7 @@ def __init__(self, config): self.starttime = time.time() self._load_contributors() - def _retry_empty(self, url): + def _retry_empty(self, url: str): """ Occasionally cold-cache queries to Github return empty results. We'll set up a retry loop to avoid that (since the built-in @@ -121,7 +123,7 @@ def _retry_empty(self, url): else: return [], {} - def _github_query(self, url, key=None, params=None): + def _github_query(self, url, key: str = "", params: dict = {}): """ Query paginated endpoint from Github @@ -135,7 +137,7 @@ def _github_query(self, url, key=None, params=None): self.log.debug(f"Requesting {url}") req = requests.models.PreparedRequest() req.prepare_url(url, params) - data, links = self._retry_empty(req.url) + data, links = self._retry_empty(str(req.url)) datatype = type(data) if key and datatype == dict and key in data: if isinstance(data[key], list): @@ -164,7 +166,7 @@ def _github_query(self, url, key=None, params=None): yield data next_url = links.get("next", dict()).get("url", "") - def _cache_user_login(self, login): + def _cache_user_login(self, login: str) -> str: """ Return user's name based on their Github login (this is so we can avoid having two keys for the same user) @@ -191,7 +193,7 @@ def _cache_user_login(self, login): self.log.debug(f"Returned name: {self.user_login_cache['logins'][login]}") return self.user_login_cache["logins"][login] - def _cache_user_name(self, name): + def _cache_user_name(self, name: str) -> str: """ Return user's actual login based on their Github name (this is so we can avoid having two keys for the same user) @@ -209,7 +211,7 @@ def _cache_user_name(self, name): f"User {name} doesn't exist in cache or in {self.special_logins}!" ) - def _load_contributors(self): + def _load_contributors(self) -> None: """ Configure all users that have commits into the repo @@ -230,7 +232,7 @@ def _load_contributors(self): f"Loaded contributors in {self.contributor_collection_time} seconds" ) - def _set_collection_date(self, date, window): + def _set_collection_date(self, date: datetime, window: int) -> None: if not self.stats["collection_date"]: self.stats["collection_date"] = date self.log.debug(f"Collection timestamp: {date}") @@ -238,7 +240,9 @@ def _set_collection_date(self, date, window): self.stats["window"] = window * 4 self.log.debug(f"Collection window: {window}") - def load_all_stats(self, base_date=datetime.today(), window=DEFAULT_WINDOW): + def load_all_stats( + self, base_date: datetime = datetime.today(), window: int = DEFAULT_WINDOW + ) -> None: """ Wrapper to execute all stat collection functions @@ -278,7 +282,9 @@ def load_all_stats(self, base_date=datetime.today(), window=DEFAULT_WINDOW): self.load_workflow_runs(base_date, window) self.stats["collection_time_secs"] = time.time() - self.starttime - def load_pull_requests(self, base_date=datetime.today(), window=DEFAULT_WINDOW): + def load_pull_requests( + self, base_date: datetime = datetime.today(), window: int = DEFAULT_WINDOW + ) -> None: """ Collect pull request data @@ -321,26 +327,21 @@ def load_pull_requests(self, base_date=datetime.today(), window=DEFAULT_WINDOW): # Calculate avg PR time created = datetime.strptime(pull["created_at"], "%Y-%m-%dT%H:%M:%SZ") - closed = False - merged = pull.get("merged_at", None) - merged_ts = None - if not merged: - closed = pull.get("closed_at", None) + closed = pull.get("closed_at", None) + merged: str = pull.get("merged_at", None) + merged_ts: float = None + if merged: self.stats["pull_requests"]["total_closed_pull_requests"] += 1 self.stats["users"][author]["total_closed_pull_requests"] += 1 + endtime = datetime.strptime(merged, "%Y-%m-%dT%H:%M:%SZ") + merged_ts = datetime.strptime(merged, "%Y-%m-%dT%H:%M:%SZ").timestamp() else: self.stats["pull_requests"]["total_merged_pull_requests"] += 1 self.stats["users"][author]["total_merged_pull_requests"] += 1 - if merged or closed: - if merged: - endtime = datetime.strptime(merged, "%Y-%m-%dT%H:%M:%SZ") - else: - endtime = datetime.strptime(closed, "%Y-%m-%dT%H:%M:%SZ") - timeopen = (endtime - created).total_seconds() - self.stats["pull_requests"]["total_pr_time_open_secs"] += timeopen - self.stats["users"][author]["total_pr_time_open_secs"] += timeopen - if merged: - merged_ts = datetime.strptime(merged, "%Y-%m-%dT%H:%M:%SZ").timestamp() + endtime = datetime.strptime(closed, "%Y-%m-%dT%H:%M:%SZ") + timeopen = (endtime - created).total_seconds() + self.stats["pull_requests"]["total_pr_time_open_secs"] += timeopen + self.stats["users"][author]["total_pr_time_open_secs"] += timeopen # process/count labels of this PR for label in pull["labels"]: @@ -409,7 +410,9 @@ def load_pull_requests(self, base_date=datetime.today(), window=DEFAULT_WINDOW): f"Loaded pull requests in {self.stats['pull_requests']['collection_time']} seconds" ) - def load_commits(self, base_date=datetime.today(), window=DEFAULT_WINDOW): + def load_commits( + self, base_date: datetime = datetime.today(), window: int = DEFAULT_WINDOW + ) -> None: """ Collect commit log from pygit2 This will not be a perfect representation of commits, but should @@ -488,7 +491,9 @@ def load_commits(self, base_date=datetime.today(), window=DEFAULT_WINDOW): f"Loaded commit history in {self.stats['commits']['collection_time']} seconds" ) - def load_branches(self, base_date=datetime.today(), window=DEFAULT_WINDOW): + def load_branches( + self, base_date: datetime = datetime.today(), window: int = DEFAULT_WINDOW + ) -> None: """ Because getting branch details requires a second query, this function will be slower than loading @@ -559,7 +564,9 @@ def load_branches(self, base_date=datetime.today(), window=DEFAULT_WINDOW): f"Loaded branch details in {self.stats['branches']['collection_time']} seconds" ) - def load_repo_stats(self, base_date=datetime.today(), window=DEFAULT_WINDOW): + def load_repo_stats( + self, base_date: datetime = datetime.today(), window: int = DEFAULT_WINDOW + ) -> None: """ This data is already visible in the "Insights" panel of a repo, but it's fairly easy to collect, so let's use it @@ -769,7 +776,83 @@ def load_repo_stats(self, base_date=datetime.today(), window=DEFAULT_WINDOW): f"Loaded repo stats in {self.stats['repo_stats']['collection_time']} seconds" ) - def load_releases(self, base_date=datetime.today(), window=DEFAULT_WINDOW): + def load_release_window_stats( + self, end_date: datetime = datetime.today(), window: int = 30 + ) -> None: + """ + Load windowed release stats + i.e. how many releases within the last X days + """ + base_date = end_date - timedelta(window) + base_time = base_date.timestamp() + end_time = end_date.timestamp() + last_release = None + all_releases_commit_deltas = [] + all_releases_total_delta_in_minutes = 0 + all_releases_total_commits = 0 + releases = list(self.repo.releases) + for release in releases: + commit_hex, timestamp, author = release + if timestamp < base_time or timestamp > end_time: + self.log.debug( + f"{commit_hex}:{timestamp} outside release window {base_time}:{end_time}" + ) + continue + self.log.info( + f"RELEASE: {commit_hex} at {timestamp} by {author}", + ) + + if last_release: + total_delta = 0 + deltas = [] + commits = self.repo.commits_between_releases(last_release, release) + self.log.info( + f"Found {len(commits)} from {last_release[0]} to {commit_hex}" + ) + for commit in commits: + delta_in_minutes = (commit.commit_time - timestamp) / 60 + deltas.append(delta_in_minutes) + total_delta += delta_in_minutes + release_average_delta_in_hours = round(total_delta / 60 / len(commits)) + release_median_delta_in_hours = round(statistics.median(deltas) / 60) + lead_time_msg = "lead time for commit in release, in hours" + self.log.debug( + f"Average {lead_time_msg}: {release_average_delta_in_hours}\n" + + f"Median {lead_time_msg}: {release_median_delta_in_hours}" + ) + + all_releases_total_commits += len(commits) + all_releases_total_delta_in_minutes += total_delta + all_releases_commit_deltas.extend(deltas) + last_release = release + + average_in_hours = 0 + median_in_hours = 0 + window_message = f"{window} days before {end_time}" + if len(releases) > 0 and all_releases_total_commits > 0: + average_in_hours = round( + all_releases_total_delta_in_minutes / 60 / all_releases_total_commits + ) + median_in_hours = round(statistics.median(all_releases_commit_deltas) / 60) + lead_time_msg = "lead time for commit->release, in hours" + self.log.info( + f"Analyzed {len(releases)} releases found in {window_message}\n" + + f"Average {lead_time_msg}: {average_in_hours}\n" + + f"Median {lead_time_msg}: {median_in_hours}" + ) + else: + self.log.info( + f"Found no releases in specified window of {window_message}", + ) + self.stats["releases"]["window_stats"] = { + "windowed_release_count": len(releases), + "avg_lead_time": average_in_hours, + "median_lead_time": median_in_hours, + } + + def load_releases( + self, base_date: datetime = datetime.today(), window: int = DEFAULT_WINDOW + ) -> None: """ Get details about releases @@ -804,7 +887,9 @@ def load_releases(self, base_date=datetime.today(), window=DEFAULT_WINDOW): f"Loaded release details in {self.stats['releases']['collection_time']} seconds" ) - def load_workflow_runs(self, base_date=datetime.today(), window=DEFAULT_WINDOW): + def load_workflow_runs( + self, base_date: datetime = datetime.today(), window: int = DEFAULT_WINDOW + ) -> None: """ Parse through workflow runs and collect results diff --git a/github_stats/gitops.py b/github_stats/gitops.py index 308f579..b25aa84 100644 --- a/github_stats/gitops.py +++ b/github_stats/gitops.py @@ -3,13 +3,14 @@ import os import pygit2 import time +from typing import Generator from github_stats.util import load_patterns from github_stats.schema import DEFAULT_WINDOW class Repo(object): - def __init__(self, config): + def __init__(self, config: dict): self.log = logging.getLogger("github-stats.repo") auth_token = os.environ.get("GITHUB_TOKEN", None) if not auth_token: @@ -32,8 +33,12 @@ def __init__(self, config): config["repo"].get("bug_matching", {}), ) self._prep_repo() + if config["repo"].get("tagged_releases", False): + self._get_releases(release_type="tag") + else: + self._get_releases(release_type="branch") - def _prep_repo(self): + def _prep_repo(self) -> None: """ Clone repo if it doesn't exist and otherwise update the main repo to current @@ -58,44 +63,53 @@ def _prep_repo(self): self.primary_branches["main"] ).target + def _get_releases(self, release_type: str = "tag") -> None: """ - find all matching tags + find all matching releases based on defined release "type" and convert them to their corresponding commit objects This let's us do an OID comparison between each commit - and the tag references + and the tag references on tag-based releases + but still get a list of "release" commits on branch-based releases """ - self.log.debug(f"{self.tag_matches=}") self.releases = [] - for r in self.repoobj.references: - self.log.debug( - f"Checking reference {r}, {self.repoobj.references[r].type} for tag matching" - ) - # use this to short-circuit larger reference lists - if ( - "tag" in r - and self.repoobj.references[r].type == pygit2.GIT_REF_OID - and any(v.match(r) for v in self.tag_matches.values()) - ): - target = self.repoobj[self.repoobj.references[r].target] - if target.type == pygit2.GIT_OBJ_TAG: - target = self.repoobj[target.target] + if release_type == "tag": + for r in self.repoobj.references: + self.log.debug( + f"Checking reference {r}, {self.repoobj.references[r].type} for tag matching" + ) + # use this to short-circuit larger reference lists + if ( + "tag" in r + and self.repoobj.references[r].type == pygit2.GIT_REF_OID + and any(v.match(r) for v in self.tag_matches.values()) + ): + target = self.repoobj[self.repoobj.references[r].target] + if target.type == pygit2.GIT_OBJ_TAG: + target = self.repoobj[target.target] + self.releases.append( + ( + str(target.hex), + int(target.commit_time), + str(target.author), + ) + ) + elif release_type == "branch": + for commit in self.branch_commit_log(self.primary_branches["release"]): self.releases.append( ( - str(target.hex), - int(target.commit_time), - str(target.author), + commit["hash"], + commit["time"], + commit["author"], ) ) + # sort by commit timestamp self.releases.sort(key=lambda x: x[1]) - def _checkout_branch(self, branch): + def _checkout_branch(self, branch: str) -> pygit2.Reference: """ Checkout a particular branch and return the tracking object - - :returns: branch object - :rtype: pygit2.Reference """ self.log.debug(f"Checking out {branch}...") remote_id = self.repoobj.lookup_reference(f"refs/remotes/origin/{branch}") @@ -141,11 +155,13 @@ def list_branches(self): yield (branch_name, commit.commit_time) self.log.debug(f"Found {branch_count} branches in the repo") - def tag_releases(self, base_date=datetime.today(), window=DEFAULT_WINDOW): + def tag_releases( + self, base_date: datetime = datetime.today(), window: int = DEFAULT_WINDOW + ) -> dict: """ :returns: total count of releases, windowed releases """ - tagged_releases = { + release_stats = { "total_releases": 0, "users": dict(), "total_window_releases": 0, @@ -154,23 +170,26 @@ def tag_releases(self, base_date=datetime.today(), window=DEFAULT_WINDOW): window_start_ts = (base_date - timedelta(window)).timestamp() for release in self.releases: user = release[2] - tagged_releases["total_releases"] += 1 - if user in tagged_releases["users"]: - tagged_releases["users"][user]["total_releases"] += 1 + release_stats["total_releases"] += 1 + if user in release_stats["users"]: + release_stats["users"][user]["total_releases"] += 1 else: - tagged_releases["users"][user] = { + release_stats["users"][user] = { "total_window_releases": 0, "total_releases": 1, } # because we check for the user above this if statement, we don't have to check again inside it if window_start_ts < release[1] < window_end_ts: - tagged_releases["total_window_releases"] += 1 - tagged_releases["users"][user]["total_window_releases"] += 1 - self.log.debug(f"{tagged_releases=}") - return tagged_releases + release_stats["total_window_releases"] += 1 + release_stats["users"][user]["total_window_releases"] += 1 + self.log.debug(f"{release_stats=}") + return release_stats def match_bugfixes( - self, pr_list, base_date=datetime.today(), window=DEFAULT_WINDOW + self, + pr_list: list, + base_date: datetime = datetime.today(), + window: int = DEFAULT_WINDOW, ): """ Given a list of PRs merge commits, find the matching releases @@ -179,15 +198,14 @@ def match_bugfixes( commit log that is newer than the commit itself :returns: rough mttr, rough windowed mttr - :rtype: float """ if not self.releases or not pr_list: return 0, 0 window_end_ts = base_date.timestamp() window_start_ts = (base_date - timedelta(window)).timestamp() - windowed_mttr = 0 + windowed_mttr: float = 0 windowed_releases = list() - mttr = 0 + mttr: float = 0 self.log.debug("Tracking MTTR...") for pr in pr_list: for release in self.releases: @@ -219,7 +237,7 @@ def match_bugfixes( return mttr, windowed_mttr def commit_release_matching( - self, base_date=datetime.today(), window=DEFAULT_WINDOW + self, base_date: datetime = datetime.today(), window: int = DEFAULT_WINDOW ): """ 1. Loop through a sorted list of all commits to the repo @@ -228,15 +246,14 @@ def commit_release_matching( 4. do a rolling average on number of releases :returns: Avg commit time, avg windowed commit time, count of unreleased commits, count of all commits - :rtype: tuple(int, int, int, int) """ window_end_ts = base_date.timestamp() window_start_ts = (base_date - timedelta(window)).timestamp() - avg_commit_time = 0 + avg_commit_time: float = 0 unreleased_commits = 0 commits = 0 windowed_releases = list() - windowed_commit_time = 0 + windowed_commit_time: float = 0 walker = self.repoobj.walk( self.main_branch_id, pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE ) @@ -288,14 +305,41 @@ def commit_release_matching( else: return 0, 0, commits, commits - def branch_commit_log(self, branch_name): + def commits_between_releases(self, release1, release2) -> list: + """ + commit_times_output = subprocess.check_output( + [ + "git", + "log", + "--format=%cI", + f"{release['tag_name']}...{last_release['tag_name']}", + ], + cwd=f"{SCRIPTDIR}/repos/{repo['name']}", + ).decode() + commit_times_split = commit_times_output.split("\n") + commit_times = [ + i for i in commit_times_split if i + ] # eliminate empty strings + """ + walker = self.repoobj.walk( + release1[0], pygit2.GIT_SORT_TIME | pygit2.GIT_SORT_TOPOLOGICAL + ) + commits = [] + self.log.info(f"Looking at commits between {release1} and {release2}") + for commit in walker: + if commit.commit_time > release2[1]: + self.log.debug("Found commit more recent that last release time") + break + commits.append(commit) + return commits + + def branch_commit_log(self, branch_name: str) -> Generator: """ Track all commits on a particular branch This doesn't work perfectly as merged branches are tougher to properly track :returns: generator of commit objects for a branch - :rtype: generator(dict()) """ self.log.debug(f"Loading commit log for {branch_name}...") commit_count = 0 diff --git a/github_stats/schema.py b/github_stats/schema.py index 777ed00..f88ebed 100644 --- a/github_stats/schema.py +++ b/github_stats/schema.py @@ -1,7 +1,7 @@ """ Defaults for some internal data """ -DEFAULT_WINDOW = 1 +DEFAULT_WINDOW: int = 1 user_schema = { "avg_pr_time_open_secs": 0, @@ -25,12 +25,12 @@ "last_commit_time": 0, } -user_login_cache = { +user_login_cache: dict = { "names": dict(), "logins": dict(), } -stats = { +stats: dict = { "branches": { "total_branches": 0, "total_window_branches": 0, diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..ebb8a88 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +exclude = repos/ +ignore_missing_imports = True diff --git a/poetry.lock b/poetry.lock index 548d5d8..aa00379 100644 --- a/poetry.lock +++ b/poetry.lock @@ -384,16 +384,62 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "mypy" +version = "1.0.0" +description = "Optional static typing for Python" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mypy-1.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0626db16705ab9f7fa6c249c017c887baf20738ce7f9129da162bb3075fc1af"}, + {file = "mypy-1.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ace23f6bb4aec4604b86c4843276e8fa548d667dbbd0cb83a3ae14b18b2db6c"}, + {file = "mypy-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87edfaf344c9401942883fad030909116aa77b0fa7e6e8e1c5407e14549afe9a"}, + {file = "mypy-1.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0ab090d9240d6b4e99e1fa998c2d0aa5b29fc0fb06bd30e7ad6183c95fa07593"}, + {file = "mypy-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:7cc2c01dfc5a3cbddfa6c13f530ef3b95292f926329929001d45e124342cd6b7"}, + {file = "mypy-1.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14d776869a3e6c89c17eb943100f7868f677703c8a4e00b3803918f86aafbc52"}, + {file = "mypy-1.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb2782a036d9eb6b5a6efcdda0986774bf798beef86a62da86cb73e2a10b423d"}, + {file = "mypy-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cfca124f0ac6707747544c127880893ad72a656e136adc935c8600740b21ff5"}, + {file = "mypy-1.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8845125d0b7c57838a10fd8925b0f5f709d0e08568ce587cc862aacce453e3dd"}, + {file = "mypy-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b1b9e1ed40544ef486fa8ac022232ccc57109f379611633ede8e71630d07d2"}, + {file = "mypy-1.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c7cf862aef988b5fbaa17764ad1d21b4831436701c7d2b653156a9497d92c83c"}, + {file = "mypy-1.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cd187d92b6939617f1168a4fe68f68add749902c010e66fe574c165c742ed88"}, + {file = "mypy-1.0.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4e5175026618c178dfba6188228b845b64131034ab3ba52acaffa8f6c361f805"}, + {file = "mypy-1.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2f6ac8c87e046dc18c7d1d7f6653a66787a4555085b056fe2d599f1f1a2a2d21"}, + {file = "mypy-1.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7306edca1c6f1b5fa0bc9aa645e6ac8393014fa82d0fa180d0ebc990ebe15964"}, + {file = "mypy-1.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3cfad08f16a9c6611e6143485a93de0e1e13f48cfb90bcad7d5fde1c0cec3d36"}, + {file = "mypy-1.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67cced7f15654710386e5c10b96608f1ee3d5c94ca1da5a2aad5889793a824c1"}, + {file = "mypy-1.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a86b794e8a56ada65c573183756eac8ac5b8d3d59daf9d5ebd72ecdbb7867a43"}, + {file = "mypy-1.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:50979d5efff8d4135d9db293c6cb2c42260e70fb010cbc697b1311a4d7a39ddb"}, + {file = "mypy-1.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3ae4c7a99e5153496243146a3baf33b9beff714464ca386b5f62daad601d87af"}, + {file = "mypy-1.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e398652d005a198a7f3c132426b33c6b85d98aa7dc852137a2a3be8890c4072"}, + {file = "mypy-1.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be78077064d016bc1b639c2cbcc5be945b47b4261a4f4b7d8923f6c69c5c9457"}, + {file = "mypy-1.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92024447a339400ea00ac228369cd242e988dd775640755fa4ac0c126e49bb74"}, + {file = "mypy-1.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:fe523fcbd52c05040c7bee370d66fee8373c5972171e4fbc323153433198592d"}, + {file = "mypy-1.0.0-py3-none-any.whl", hash = "sha256:2efa963bdddb27cb4a0d42545cd137a8d2b883bd181bbc4525b568ef6eca258f"}, + {file = "mypy-1.0.0.tar.gz", hash = "sha256:f34495079c8d9da05b183f9f7daec2878280c2ad7cc81da686ef0b484cea2ecf"}, +] + +[package.dependencies] +mypy-extensions = ">=0.4.3" +typing-extensions = ">=3.10" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] + [[package]] name = "mypy-extensions" -version = "0.4.3" -description = "Experimental type system extensions for programs checked with the mypy typechecker." +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." category = "dev" optional = false -python-versions = "*" +python-versions = ">=3.5" files = [ - {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, - {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] [[package]] @@ -422,19 +468,19 @@ files = [ [[package]] name = "platformdirs" -version = "2.6.2" +version = "3.0.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "platformdirs-2.6.2-py3-none-any.whl", hash = "sha256:83c8f6d04389165de7c9b6f0c682439697887bca0aa2f1c87ef1826be3584490"}, - {file = "platformdirs-2.6.2.tar.gz", hash = "sha256:e1fea1fe471b9ff8332e229df3cb7de4f53eeea4998d3b6bfff542115e998bd2"}, + {file = "platformdirs-3.0.0-py3-none-any.whl", hash = "sha256:b1d5eb14f221506f50d6604a561f4c5786d9e80355219694a1b244bcd96f4567"}, + {file = "platformdirs-3.0.0.tar.gz", hash = "sha256:8a1228abb1ef82d788f74139988b137e78692984ec7b08eaa6c65f1723af28f9"}, ] [package.extras] -docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=5.3)", "sphinx-autodoc-typehints (>=1.19.5)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.2.2)", "pytest (>=7.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] +docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.2.2)", "pytest (>=7.2.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] [[package]] name = "pycodestyle" @@ -718,14 +764,14 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "setuptools" -version = "67.1.0" +version = "67.2.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "setuptools-67.1.0-py3-none-any.whl", hash = "sha256:a7687c12b444eaac951ea87a9627c4f904ac757e7abdc5aac32833234af90378"}, - {file = "setuptools-67.1.0.tar.gz", hash = "sha256:e261cdf010c11a41cb5cb5f1bf3338a7433832029f559a6a7614bd42a967c300"}, + {file = "setuptools-67.2.0-py3-none-any.whl", hash = "sha256:16ccf598aab3b506593c17378473978908a2734d7336755a8769b480906bec1c"}, + {file = "setuptools-67.2.0.tar.gz", hash = "sha256:b440ee5f7e607bb8c9de15259dba2583dd41a38879a7abc1d43a71c59524da48"}, ] [package.extras] @@ -745,6 +791,81 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "types-google-cloud-ndb" +version = "2.1.0.1" +description = "Typing stubs for google-cloud-ndb" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-google-cloud-ndb-2.1.0.1.tar.gz", hash = "sha256:6f9dfecd886551a544b182cd7d67d39db87eed4f5f5327c78b20086f52f56f2a"}, + {file = "types_google_cloud_ndb-2.1.0.1-py3-none-any.whl", hash = "sha256:bf224b00f7bed33637c222d97d881daa30493d55da64162b1e9309a730b5ec31"}, +] + +[[package]] +name = "types-python-dateutil" +version = "2.8.19.6" +description = "Typing stubs for python-dateutil" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-python-dateutil-2.8.19.6.tar.gz", hash = "sha256:4a6f4cc19ce4ba1a08670871e297bf3802f55d4f129e6aa2443f540b6cf803d2"}, + {file = "types_python_dateutil-2.8.19.6-py3-none-any.whl", hash = "sha256:cfb7d31021c6bce6f3362c69af6e3abb48fe3e08854f02487e844ff910deec2a"}, +] + +[[package]] +name = "types-pyyaml" +version = "6.0.12.5" +description = "Typing stubs for PyYAML" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-PyYAML-6.0.12.5.tar.gz", hash = "sha256:3b61b7a8111ce368eb366e4a13f3e94e568bc2ed6227e01520a50ee07993bf38"}, + {file = "types_PyYAML-6.0.12.5-py3-none-any.whl", hash = "sha256:dcaf87b65b839e7b641721346ef8b12a87f94071e15205a64ac93ca0e0afc77a"}, +] + +[[package]] +name = "types-regex" +version = "2022.10.31.4" +description = "Typing stubs for regex" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-regex-2022.10.31.4.tar.gz", hash = "sha256:c18275a16061b816f4c50e098300a551e89bc3b77e5549de4f4f3f048f548473"}, + {file = "types_regex-2022.10.31.4-py3-none-any.whl", hash = "sha256:17ab5f85e47cb5bd65cb5e0d19d88301fae8c23941b9417300199164ee837ee6"}, +] + +[[package]] +name = "types-requests" +version = "2.28.11.12" +description = "Typing stubs for requests" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-requests-2.28.11.12.tar.gz", hash = "sha256:fd530aab3fc4f05ee36406af168f0836e6f00f1ee51a0b96b7311f82cb675230"}, + {file = "types_requests-2.28.11.12-py3-none-any.whl", hash = "sha256:dbc2933635860e553ffc59f5e264264981358baffe6342b925e3eb8261f866ee"}, +] + +[package.dependencies] +types-urllib3 = "<1.27" + +[[package]] +name = "types-urllib3" +version = "1.26.25.5" +description = "Typing stubs for urllib3" +category = "dev" +optional = false +python-versions = "*" +files = [ + {file = "types-urllib3-1.26.25.5.tar.gz", hash = "sha256:5630e578246d170d91ebe3901788cd28d53c4e044dc2e2488e3b0d55fb6895d8"}, + {file = "types_urllib3-1.26.25.5-py3-none-any.whl", hash = "sha256:e8f25c8bb85cde658c72ee931e56e7abd28803c26032441eea9ff4a4df2b0c31"}, +] + [[package]] name = "typing-extensions" version = "4.4.0" @@ -777,4 +898,4 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "b6360b002a499a7b485f6a6066c80147df23c3128badf47ee871419e42517b3d" +content-hash = "d2f5dc7936483000eb10d6e7e0672d318905678d820fc96104438799928eaaf1" diff --git a/pyproject.toml b/pyproject.toml index 8f42cc8..668db4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,9 +13,15 @@ pygit2 = "^1.11.1" influxdb-client = {extras = ["ciso"], version = "^1.35.0"} python-dateutil = "^2.8.2" -[tool.poetry.dev-dependencies] +[tool.poetry.group.dev.dependencies] +mypy = "^1.0.0" flake8 = "^6.0.0" black = "^23" +types-requests = "^2.28.11.12" +types-python-dateutil = "^2.8.19.6" +types-regex = "^2022.10.31.4" +types-pyyaml = "^6.0.12.5" +types-google-cloud-ndb = "^2.1.0.1" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/view-release-stats.py b/view-release-stats.py new file mode 100644 index 0000000..219d42f --- /dev/null +++ b/view-release-stats.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +import copy +import logging +import os +import time + +# local imports +from github_stats.github_api import GithubAccess +from github_stats.util import load_config + +SCRIPTDIR = os.path.dirname(os.path.realpath(__file__)) + +""" +Utility script to view release statistics (eg lead time). +Maybe the basis for different stats collection in the future. +Run like: poetry run python view-release-stats.py -c config.yml -w 30 +to see stats for rleeases in the last 30 days +""" + + +def cli_opts(): + """ + Process CLI options + """ + parser = ArgumentParser( + description="Collect data about Github Releases", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--debug", action="store_true", default=False, help="Show debug information" + ) + parser.add_argument( + "-c", + "--config", + default=f"{SCRIPTDIR}/config.yml", + help="Config file location", + ) + parser.add_argument( + "-w", + "--window", + default=30, + type=int, + help="Number of days worth of data to collect", + ) + parser.add_argument( + "--timestamp", + default=time.time(), + type=float, + help="UTC timestamp to start looking at data from", + ) + return parser.parse_args() + + +def main(): + args = cli_opts() + logger = logging.getLogger("github-stats") + logger.setLevel(logging.INFO) + logger.addHandler(logging.StreamHandler()) + if args.debug: + logger.setLevel(logging.DEBUG) + config = load_config(args.config) + for repo in config["repos"]: + local_config = copy.deepcopy(config) + local_config.pop("repos", None) + local_config["repo"] = repo + end_date = args.timestamp + gh = GithubAccess(local_config) + gh.load_release_window_stats(end_date, args.window) + + +if __name__ == "__main__": + main()