From fba74656caeb961943f7af2adc79d215700840b4 Mon Sep 17 00:00:00 2001 From: Compass AI Date: Mon, 15 Dec 2025 10:39:14 +0000 Subject: [PATCH 1/5] Add author retrieval functionality across all Git providers and API endpoint Implemented a new `get_authors()` method across all fetcher classes (GitHub, GitLab, Azure DevOps, and URL-based) to retrieve unique author information from repositories. Added corresponding API schemas and a new `/api/authors` endpoint that allows filtering by repository names and returns deduplicated author data with names and emails. Includes comprehensive test coverage for the new functionality across different providers and edge cases. --- app/api/models/schemas.py | 25 +++- app/api/server/routes.py | 192 +++++++++++++++++++++----- git_recap/providers/azure_fetcher.py | 103 ++++++++++---- git_recap/providers/base_fetcher.py | 15 ++ git_recap/providers/github_fetcher.py | 64 +++++++-- git_recap/providers/gitlab_fetcher.py | 57 +++++++- git_recap/providers/url_fetcher.py | 87 ++++++++++-- tests/test_parser.py | 180 +++++++++++++++++++++++- 8 files changed, 636 insertions(+), 87 deletions(-) diff --git a/app/api/models/schemas.py b/app/api/models/schemas.py index df69dbe..d056c51 100644 --- a/app/api/models/schemas.py +++ b/app/api/models/schemas.py @@ -142,4 +142,27 @@ class CommitMessagesForPRDescriptionRequest(BaseModel): session_id: str = Field(..., description="Session identifier.") class PRDescriptionResponse(BaseModel): - description: str = Field(..., description="LLM-generated pull request description.") \ No newline at end of file + description: str = Field(..., description="LLM-generated pull request description.") + + +# --- Authors Endpoint Schemas --- +class AuthorInfo(BaseModel): + """Individual author information""" + name: str = Field(..., description="Author's name") + email: str = Field(..., description="Author's email address") + + +class GetAuthorsRequest(BaseModel): + """Request model for fetching authors""" + session_id: str = Field(..., description="Session identifier") + repo_names: Optional[List[str]] = Field( + default=[], + description="List of repository names to fetch authors from. Empty list fetches from all repositories." + ) + + +class GetAuthorsResponse(BaseModel): + """Response model containing list of authors""" + authors: List[AuthorInfo] = Field(..., description="List of unique authors") + total_count: int = Field(..., description="Total number of unique authors") + repo_count: int = Field(..., description="Number of repositories processed") \ No newline at end of file diff --git a/app/api/server/routes.py b/app/api/server/routes.py index 6be4ca0..6f77d26 100644 --- a/app/api/server/routes.py +++ b/app/api/server/routes.py @@ -1,5 +1,6 @@ from fastapi import APIRouter, HTTPException, Request, Query -from pydantic import BaseModel +from pydantic import BaseModel, Field +from typing import Optional, List, Dict from models.schemas import ( BranchListResponse, @@ -7,26 +8,51 @@ ValidTargetBranchesResponse, CreatePullRequestRequest, CreatePullRequestResponse, + GetPullRequestDiffRequest, + GetPullRequestDiffResponse, ) -from models.schemas import GetPullRequestDiffRequest, GetPullRequestDiffResponse from services.llm_service import set_llm, get_llm, trim_messages from services.fetcher_service import store_fetcher, get_fetcher from git_recap.utils import parse_entries_to_txt, parse_releases_to_txt from aicore.llm.config import LlmConfig from datetime import datetime, timezone -from typing import Optional, List import requests import os router = APIRouter() + class CloneRequest(BaseModel): """Request model for repository cloning endpoint.""" url: str + +class GetAuthorsRequest(BaseModel): + """Request model for fetching authors from repositories.""" + session_id: str = Field(..., description="Session identifier") + repo_names: Optional[List[str]] = Field( + default=[], + description="List of repository names to fetch authors from. Empty list fetches from all repositories." + ) + + +class AuthorInfo(BaseModel): + """Individual author information.""" + name: str = Field(..., description="Author's name") + email: str = Field(..., description="Author's email address") + + +class GetAuthorsResponse(BaseModel): + """Response model containing list of authors.""" + authors: List[AuthorInfo] = Field(..., description="List of unique authors") + total_count: int = Field(..., description="Total number of unique authors") + repo_count: int = Field(..., description="Number of repositories processed") + + GITHUB_ACCESS_TOKEN_URL = 'https://github.com/login/oauth/access_token' + @router.post("/clone-repo") async def clone_repository(request: CloneRequest): """ @@ -51,12 +77,26 @@ async def clone_repository(request: CloneRequest): except Exception as e: raise HTTPException(status_code=500, detail=f"Failed to clone repository: {str(e)}") + @router.get("/external-signup") async def external_signup(app: str, accessToken: str, provider: str): + """ + Handle external OAuth signup flow. + + Args: + app: Application name + accessToken: OAuth access token or authorization code + provider: Provider name (e.g., "github") + + Returns: + dict: Contains session_id, token, and provider information + + Raises: + HTTPException: 400 for unsupported provider or token errors + """ if provider.lower() != "github": raise HTTPException(status_code=400, detail="Unsupported provider") - # Build the URL to exchange the code for a token params = { "client_id": os.getenv("VITE_GITHUB_CLIENT_ID"), "client_secret": os.getenv("VITE_GITHUB_CLIENT_SECRET"), @@ -83,6 +123,7 @@ async def external_signup(app: str, accessToken: str, provider: str): response["provider"] = provider return await store_fetcher_endpoint(response) + @router.post("/pat") async def store_fetcher_endpoint(request: Request): """ @@ -92,7 +133,7 @@ async def store_fetcher_endpoint(request: Request): request: Contains JSON payload with 'session_id' and 'pat' Returns: - dict: Contains session_id + dict: Contains session_id and username Raises: HTTPException: 400 if PAT is missing @@ -112,11 +153,10 @@ async def store_fetcher_endpoint(request: Request): username = store_fetcher(session_id, token, provider) return {"session_id": session_id, "username": username} -async def create_llm_session( - request: Optional[LlmConfig] = None -): + +async def create_llm_session(request: Optional[LlmConfig] = None): """ - Create a new LLM session with custom configuration + Create a new LLM session with custom configuration. Args: request: Optional LLM configuration @@ -136,6 +176,7 @@ async def create_llm_session( except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + @router.get("/repos") async def get_repos(session_id: str): """ @@ -153,6 +194,7 @@ async def get_repos(session_id: str): fetcher = get_fetcher(session_id) return {"repos": fetcher.repos_names} + @router.get("/actions") async def get_actions( session_id: str, @@ -183,7 +225,6 @@ async def get_actions( authors = sum([author.split(",") for author in authors], []) fetcher = get_fetcher(session_id) - # Convert date strings to datetime objects start_dt = datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc) if start_date else None end_dt = datetime.fromisoformat(end_date).replace(tzinfo=timezone.utc) if end_date else None @@ -203,6 +244,7 @@ async def get_actions( return {"actions": parse_entries_to_txt(actions)} + @router.get("/release_notes") async def get_release_notes( session_id: str, @@ -211,20 +253,27 @@ async def get_release_notes( ): """ Generate release notes for the latest release of a single repository. - Validates input, fetches releases, fetches actions since latest release, and returns compiled release notes. + + Args: + session_id: The session identifier + repo_filter: Must contain exactly one repository name + num_old_releases: Number of previous releases to include for context + + Returns: + dict: Contains actions and release notes text + + Raises: + HTTPException: 400 for invalid input, 404 for session not found, 500 for errors """ - # Validate repo_filter: must be a single repo if repo_filter is None or len(repo_filter) != 1: raise HTTPException(status_code=400, detail="repo_filter must be a list containing exactly one repository name.") repo = repo_filter[0] - # Get fetcher for session try: fetcher = get_fetcher(session_id) except HTTPException: raise - # Check if fetcher supports fetch_releases try: releases = fetcher.fetch_releases() except NotImplementedError: @@ -233,7 +282,6 @@ async def get_release_notes( raise HTTPException(status_code=500, detail=f"Error fetching releases: {str(e)}") releases_txt = parse_releases_to_txt(releases[:num_old_releases]) - # Filter releases for the requested repo repo_releases = [r for r in releases if r.get("repo") == repo] n_releases = len(repo_releases) if n_releases < 1: @@ -244,7 +292,6 @@ async def get_release_notes( detail=f"num_old_releases must be at least 1 and less than the number of releases available ({n_releases}) for this repository." ) - # Sort releases by published_at descending (latest first) try: repo_releases.sort(key=lambda r: r.get("published_at") or r.get("created_at"), reverse=True) except Exception: @@ -252,11 +299,9 @@ async def get_release_notes( latest_release = repo_releases[0] - # Determine the start_date for actions (latest release date) release_date = latest_release.get("published_at") or latest_release.get("created_at") if not release_date: raise HTTPException(status_code=500, detail="Latest release does not have a valid date.") - # Accept both datetime and string if isinstance(release_date, datetime): start_date_iso = release_date.astimezone(timezone.utc).isoformat() else: @@ -266,9 +311,6 @@ async def get_release_notes( except Exception: raise HTTPException(status_code=500, detail="Release date is not a valid ISO format.") - # Fetch actions since latest release for this repo - # Reuse get_actions logic, but inline to avoid async call - # Set fetcher filters fetcher.start_date = datetime.fromisoformat(start_date_iso) fetcher.end_dt = None fetcher.repo_filter = [repo] @@ -280,14 +322,21 @@ async def get_release_notes( return {"actions": "\n\n".join([actions_txt, releases_txt])} -# --- Branch and Pull Request Management Endpoints --- + @router.get("/branches", response_model=BranchListResponse) -async def get_branches( - session_id: str, - repo: str -): +async def get_branches(session_id: str, repo: str): """ Get all branches for a given repository in the current session. + + Args: + session_id: The session identifier + repo: Repository name + + Returns: + BranchListResponse: Contains list of branch names + + Raises: + HTTPException: 400 if not supported, 404 if session not found, 500 for errors """ fetcher = get_fetcher(session_id) try: @@ -299,12 +348,20 @@ async def get_branches( raise HTTPException(status_code=500, detail=f"Failed to fetch branches: {str(e)}") return BranchListResponse(branches=branches) + @router.post("/valid-target-branches", response_model=ValidTargetBranchesResponse) -async def get_valid_target_branches( - req: ValidTargetBranchesRequest -): +async def get_valid_target_branches(req: ValidTargetBranchesRequest): """ Get all valid target branches for a given source branch in a repository. + + Args: + req: ValidTargetBranchesRequest containing session_id, repo, and source_branch + + Returns: + ValidTargetBranchesResponse: Contains list of valid target branch names + + Raises: + HTTPException: 400 for validation errors, 404 if session not found, 500 for errors """ fetcher = get_fetcher(req.session_id) try: @@ -318,10 +375,21 @@ async def get_valid_target_branches( raise HTTPException(status_code=500, detail=f"Failed to validate target branches: {str(e)}") return ValidTargetBranchesResponse(valid_target_branches=valid_targets) + @router.post("/create-pull-request", response_model=CreatePullRequestResponse) -async def create_pull_request( - req: CreatePullRequestRequest -): +async def create_pull_request(req: CreatePullRequestRequest): + """ + Create a pull request between two branches with optional metadata. + + Args: + req: CreatePullRequestRequest containing all PR details + + Returns: + CreatePullRequestResponse: Contains PR URL, number, state, and success status + + Raises: + HTTPException: 400 for validation errors, 404 if session not found, 500 for errors + """ fetcher = get_fetcher(req.session_id) fetcher.repo_filter = [req.repo] if not req.description or not req.description.strip(): @@ -351,8 +419,21 @@ async def create_pull_request( generated_description=None ) + @router.post("/get-pull-request-diff") async def get_pull_request_diff(req: GetPullRequestDiffRequest): + """ + Get the diff between two branches for pull request preview. + + Args: + req: GetPullRequestDiffRequest containing session_id, repo, source_branch, and target_branch + + Returns: + dict: Contains formatted commit actions between branches + + Raises: + HTTPException: 400 if not supported or GitHub only, 404 if session not found, 500 for errors + """ fetcher = get_fetcher(req.session_id) fetcher.repo_filter = [req.repo] provider = type(fetcher).__name__.lower() @@ -365,3 +446,50 @@ async def get_pull_request_diff(req: GetPullRequestDiffRequest): except Exception as e: raise HTTPException(status_code=500, detail=f"Failed to fetch pull request diff: {str(e)}") return {"actions": parse_entries_to_txt(commits)} + + +@router.post("/api/authors", response_model=GetAuthorsResponse) +async def get_authors(request: GetAuthorsRequest): + """ + Retrieve list of unique authors from specified repositories. + + Args: + request: GetAuthorsRequest containing session_id and optional repo_names + + Returns: + GetAuthorsResponse with list of authors and metadata + + Raises: + HTTPException: 404 if session not found, 500 for fetcher errors + """ + try: + fetcher = get_fetcher(request.session_id) + + if not fetcher: + raise HTTPException( + status_code=404, + detail=f"Session {request.session_id} not found or expired" + ) + + authors_data = fetcher.get_authors(request.repo_names or []) + + authors = [ + AuthorInfo(name=author["name"], email=author["email"]) + for author in authors_data + ] + + response = GetAuthorsResponse( + authors=authors, + total_count=len(authors), + repo_count=len(request.repo_names) if request.repo_names else 0 + ) + + return response + + except HTTPException: + raise + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Error fetching authors: {str(e)}" + ) \ No newline at end of file diff --git a/git_recap/providers/azure_fetcher.py b/git_recap/providers/azure_fetcher.py index 42a8300..9fba869 100644 --- a/git_recap/providers/azure_fetcher.py +++ b/git_recap/providers/azure_fetcher.py @@ -1,14 +1,16 @@ from azure.devops.connection import Connection from msrest.authentication import BasicAuthentication +from azure.devops.exceptions import AzureDevOpsServiceError from datetime import datetime from typing import List, Dict, Any, Optional from git_recap.providers.base_fetcher import BaseFetcher + class AzureFetcher(BaseFetcher): """ Fetcher implementation for Azure DevOps repositories. - Supports fetching commits, pull requests, and issues. + Supports fetching commits, pull requests, issues, and authors. Release fetching is not supported and will raise NotImplementedError. """ @@ -30,9 +32,12 @@ def __init__(self, pat: str, organization_url: str, start_date=None, end_date=No self.connection = Connection(base_url=self.organization_url, creds=credentials) self.core_client = self.connection.clients.get_core_client() self.git_client = self.connection.clients.get_git_client() + + # Extract project name from organization URL or use first project + projects = self.core_client.get_projects().value + self.project_name = projects[0].name if projects else None + self.repos = self.get_repos() - # Azure DevOps doesn't provide an affiliation filter; - # we'll iterate over all repos in each project. if authors is None: self.authors = [] @@ -43,8 +48,10 @@ def get_repos(self): List of repository objects. """ projects = self.core_client.get_projects().value - # Get all repositories in each project - repos = [self.git_client.get_repositories(project.id) for project in projects] + repos = [] + for project in projects: + project_repos = self.git_client.get_repositories(project.id) + repos.extend(project_repos) return repos @property @@ -55,8 +62,7 @@ def repos_names(self) -> List[str]: Returns: List[str]: List of repository names. """ - # To be implemented if needed for UI or listing. - ... + return [repo.name for repo in self.repos] def _filter_by_date(self, date_obj: datetime) -> bool: """ @@ -103,15 +109,14 @@ def fetch_commits(self) -> List[Dict[str, Any]]: for author in self.authors: try: commits = self.git_client.get_commits( - project=repo.id, + project=repo.project.id, repository_id=repo.id, search_criteria={"author": author} ) except Exception: continue for commit in commits: - # Azure DevOps returns a commit with an 'author' property. - commit_date = commit.author.date # assumed datetime + commit_date = commit.author.date if self._filter_by_date(commit_date): sha = commit.commit_id if sha not in processed_commits: @@ -151,10 +156,9 @@ def fetch_pull_requests(self) -> List[Dict[str, Any]]: except Exception: continue for pr in pull_requests: - # Check that the PR creator is one of our authors. if pr.created_by.unique_name not in self.authors: continue - pr_date = pr.creation_date # type: datetime + pr_date = pr.creation_date if not self._filter_by_date(pr_date): continue @@ -204,7 +208,6 @@ def fetch_issues(self) -> List[Dict[str, Any]]: """ entries = [] wit_client = self.connection.clients.get_work_item_tracking_client() - # Query work items for each author using a simplified WIQL query. for author in self.authors: wiql = f"SELECT [System.Id], [System.Title], [System.CreatedDate] FROM WorkItems WHERE [System.AssignedTo] CONTAINS '{author}'" try: @@ -235,7 +238,6 @@ def fetch_releases(self) -> List[Dict[str, Any]]: Raises: NotImplementedError: Always, since release fetching is not supported for AzureFetcher. """ - # If Azure DevOps release fetching is supported in the future, implement logic here. raise NotImplementedError("Release fetching is not supported for Azure DevOps (AzureFetcher).") def get_branches(self) -> List[str]: @@ -248,9 +250,6 @@ def get_branches(self) -> List[str]: Raises: NotImplementedError: Always, since branch listing is not yet implemented for AzureFetcher. """ - # TODO: Implement get_branches() for Azure DevOps support - # This would use: git_client.get_branches(repository_id, project) - # and extract branch names from the returned objects raise NotImplementedError("Branch listing is not yet implemented for Azure DevOps (AzureFetcher).") def get_valid_target_branches(self, source_branch: str) -> List[str]: @@ -270,14 +269,6 @@ def get_valid_target_branches(self, source_branch: str) -> List[str]: Raises: NotImplementedError: Always, since PR target validation is not yet implemented for AzureFetcher. """ - # TODO: Implement get_valid_target_branches() for Azure DevOps support - # This would require: - # 1. Verify source_branch exists using git_client.get_branch() - # 2. Get all branches using get_branches() - # 3. Filter out source branch - # 4. Check for existing pull requests using git_client.get_pull_requests() - # 5. Filter out branches with existing open PRs from source - # 6. Optionally check branch policies and protection rules raise NotImplementedError("Pull request target branch validation is not yet implemented for Azure DevOps (AzureFetcher).") def create_pull_request( @@ -310,7 +301,61 @@ def create_pull_request( Raises: NotImplementedError: Always, since PR creation is not yet implemented for AzureFetcher. """ - # TODO: Implement create_pull_request() for Azure DevOps support - # This would use: git_client.create_pull_request() with appropriate parameters - # Would need to handle reviewers, work item links (assignees), labels, and draft status - raise NotImplementedError("Pull request creation is not yet implemented for Azure DevOps (AzureFetcher).") \ No newline at end of file + raise NotImplementedError("Pull request creation is not yet implemented for Azure DevOps (AzureFetcher).") + + def get_authors(self, repo_names: List[str]) -> List[Dict[str, str]]: + """ + Retrieve unique authors from specified Azure DevOps repositories. + + Args: + repo_names: List of repository names. + Empty list fetches from all accessible repositories. + + Returns: + List of unique author dictionaries with name and email. + """ + authors_set = set() + + try: + git_client = self.connection.clients.get_git_client() + + if not repo_names: + repos = self.repos + else: + repos = [repo for repo in self.repos if repo.name in repo_names] + + for repo in repos: + if self.repo_filter and repo.name not in self.repo_filter: + continue + + try: + commits = git_client.get_commits( + repository_id=repo.id, + search_criteria={'$top': 1000} + ) + + for commit in commits: + if commit.author: + author_name = commit.author.name or "Unknown" + author_email = commit.author.email or "unknown@example.com" + authors_set.add((author_name, author_email)) + + if commit.committer: + committer_name = commit.committer.name or "Unknown" + committer_email = commit.committer.email or "unknown@example.com" + authors_set.add((committer_name, committer_email)) + + except AzureDevOpsServiceError as e: + print(f"Error fetching authors from {repo.name}: {e}") + continue + + authors_list = [ + {"name": name, "email": email} + for name, email in sorted(authors_set) + ] + + return authors_list + + except Exception as e: + print(f"Error in get_authors: {e}") + return [] \ No newline at end of file diff --git a/git_recap/providers/base_fetcher.py b/git_recap/providers/base_fetcher.py index 4741cda..d4df2ca 100644 --- a/git_recap/providers/base_fetcher.py +++ b/git_recap/providers/base_fetcher.py @@ -156,6 +156,21 @@ def create_pull_request( """ raise NotImplementedError("Subclasses must implement create_pull_request() to create a pull request with the specified parameters") + @abstractmethod + def get_authors(self, repo_names: List[str]) -> List[Dict[str, str]]: + """ + Retrieve unique authors from specified repositories. + + Args: + repo_names: List of repository names to fetch authors from. + Empty list means fetch from all available repositories. + + Returns: + List of dictionaries containing author information: + [{"name": "John Doe", "email": "john@example.com"}, ...] + """ + pass + def get_authored_messages(self) -> List[Dict[str, Any]]: """ Aggregates all commit, pull request, and issue entries into a single list, diff --git a/git_recap/providers/github_fetcher.py b/git_recap/providers/github_fetcher.py index 98f9cd9..73ec5c1 100644 --- a/git_recap/providers/github_fetcher.py +++ b/git_recap/providers/github_fetcher.py @@ -12,7 +12,7 @@ class GitHubFetcher(BaseFetcher): """ Fetcher implementation for GitHub repositories. - Supports fetching commits, pull requests, issues, and releases. + Supports fetching commits, pull requests, issues, releases, and authors. """ def __init__(self, pat: str, start_date=None, end_date=None, repo_filter=None, authors=None): @@ -92,9 +92,7 @@ def fetch_branch_diff_commits(self, source_branch: str, target_branch: str) -> L def fetch_pull_requests(self) -> List[Dict[str, Any]]: entries = [] - # Maintain a local set to skip duplicate commits already captured in a PR. processed_pr_commits = set() - # Retrieve repos where you're owner, a collaborator, or an organization member. for repo in self.repos: if self.repo_filter and repo.name not in self.repo_filter: continue @@ -102,11 +100,10 @@ def fetch_pull_requests(self) -> List[Dict[str, Any]]: for i, pr in enumerate(pulls, start=1): if pr.user.login not in self.authors: continue - pr_date = pr.updated_at # alternatively, use pr.created_at + pr_date = pr.updated_at if not self._filter_by_date(pr_date): continue - # Add the pull request itself. pr_entry = { "type": "pull_request", "repo": repo.name, @@ -116,7 +113,6 @@ def fetch_pull_requests(self) -> List[Dict[str, Any]]: } entries.append(pr_entry) - # Now, add commits associated with this pull request. pr_commits = pr.get_commits() for pr_commit in pr_commits: commit_date = pr_commit.commit.author.date @@ -178,7 +174,6 @@ def fetch_releases(self) -> List[Dict[str, Any]]: continue try: for rel in repo.get_releases(): - # Compose asset list assets = [] for asset in rel.get_assets(): assets.append({ @@ -203,7 +198,6 @@ def fetch_releases(self) -> List[Dict[str, Any]]: } releases.append(release_entry) except Exception: - # If fetching releases fails for a repo, skip it (could be permissions or no releases) continue return releases @@ -267,7 +261,6 @@ def get_valid_target_branches(self, source_branch: str) -> List[str]: continue logger.debug(f"Processing repository: {repo.name}") repo_branches = [branch.name for branch in repo.get_branches()] - # Get existing open PRs from source branch try: open_prs = repo.get_pulls(state='open', head=source_branch) except GithubException as e: @@ -284,7 +277,6 @@ def get_valid_target_branches(self, source_branch: str) -> List[str]: if branch_name in existing_pr_targets: logger.debug(f"Excluding branch with existing PR: {branch_name}") continue - # Optionally check if source is ahead of target (performance cost) valid_targets.append(branch_name) logger.debug(f"Valid target branch: {branch_name}") logger.debug(f"Found {len(valid_targets)} valid target branches") @@ -405,4 +397,54 @@ def create_pull_request( raise except Exception as e: logger.error(f"Unexpected error while creating pull request: {str(e)}") - raise Exception(f"Failed to create pull request: {str(e)}") \ No newline at end of file + raise Exception(f"Failed to create pull request: {str(e)}") + + def get_authors(self, repo_names: List[str]) -> List[Dict[str, str]]: + """ + Retrieve unique authors from specified GitHub repositories. + + Args: + repo_names: List of repository names (format: "owner/repo"). + Empty list fetches from all accessible repositories. + + Returns: + List of unique author dictionaries with name and email. + """ + authors_set = set() + + try: + if not repo_names: + repos = self.github.get_user().get_repos() + repo_names = [repo.full_name for repo in repos] + + for repo_name in repo_names: + try: + repo = self.github.get_repo(repo_name) + + commits = repo.get_commits() + + for commit in commits: + if commit.commit.author: + author_name = commit.commit.author.name or "Unknown" + author_email = commit.commit.author.email or "unknown@example.com" + authors_set.add((author_name, author_email)) + + if commit.commit.committer: + committer_name = commit.commit.committer.name or "Unknown" + committer_email = commit.commit.committer.email or "unknown@example.com" + authors_set.add((committer_name, committer_email)) + + except GithubException as e: + print(f"Error fetching authors from {repo_name}: {e}") + continue + + authors_list = [ + {"name": name, "email": email} + for name, email in sorted(authors_set) + ] + + return authors_list + + except Exception as e: + print(f"Error in get_authors: {e}") + return [] \ No newline at end of file diff --git a/git_recap/providers/gitlab_fetcher.py b/git_recap/providers/gitlab_fetcher.py index 96c974c..7797d6f 100644 --- a/git_recap/providers/gitlab_fetcher.py +++ b/git_recap/providers/gitlab_fetcher.py @@ -7,7 +7,7 @@ class GitLabFetcher(BaseFetcher): """ Fetcher implementation for GitLab repositories. - Supports fetching commits, merge requests (pull requests), and issues. + Supports fetching commits, merge requests (pull requests), issues, and authors. Release fetching is not supported and will raise NotImplementedError. """ @@ -246,4 +246,57 @@ def create_pull_request( Raises: NotImplementedError: Always, since PR creation is not yet implemented for GitLabFetcher. """ - raise NotImplementedError("Pull request (merge request) creation is not yet implemented for GitLab (GitLabFetcher).") \ No newline at end of file + raise NotImplementedError("Pull request (merge request) creation is not yet implemented for GitLab (GitLabFetcher).") + + def get_authors(self, repo_names: List[str]) -> List[Dict[str, str]]: + """ + Retrieve unique authors from specified GitLab projects. + + Args: + repo_names: List of project names/IDs. + Empty list fetches from all accessible projects. + + Returns: + List of unique author dictionaries with name and email. + """ + authors_set = set() + + try: + # If no specific projects provided, get all accessible projects + if not repo_names: + projects = self.gl.projects.list(membership=True, all=True) + repo_names = [project.path_with_namespace for project in projects] + + for repo_name in repo_names: + try: + project = self.gl.projects.get(repo_name) + + # Fetch commits + commits = project.commits.list(all=True) + + for commit in commits: + author_name = commit.author_name or "Unknown" + author_email = commit.author_email or "unknown@example.com" + authors_set.add((author_name, author_email)) + + # Also add committer if available + if hasattr(commit, 'committer_name') and commit.committer_name: + committer_name = commit.committer_name + committer_email = commit.committer_email or "unknown@example.com" + authors_set.add((committer_name, committer_email)) + + except gitlab.exceptions.GitlabGetError as e: + print(f"Error fetching authors from {repo_name}: {e}") + continue + + # Convert set to list of dictionaries + authors_list = [ + {"name": name, "email": email} + for name, email in sorted(authors_set) + ] + + return authors_list + + except Exception as e: + print(f"Error in get_authors: {e}") + return [] \ No newline at end of file diff --git a/git_recap/providers/url_fetcher.py b/git_recap/providers/url_fetcher.py index e4424c9..624e02e 100644 --- a/git_recap/providers/url_fetcher.py +++ b/git_recap/providers/url_fetcher.py @@ -36,7 +36,7 @@ def __init__( ) self.url = self._normalize_url(url) self.temp_dir = None - # self._validate_url() + self.repo_path = None self._clone_repo() def _normalize_url(self, url: str) -> str: @@ -59,7 +59,7 @@ def _validate_url(self) -> None: capture_output=True, text=True, check=True, - timeout=10 # Add timeout to prevent hanging + timeout=10 ) if not result.stdout.strip(): raise ValueError(f"URL {self.url} points to an empty repository") @@ -71,8 +71,8 @@ def _validate_url(self) -> None: def _clone_repo(self) -> None: """Clone the repository to a temporary directory with all branches.""" self.temp_dir = tempfile.mkdtemp(prefix="gitrecap_") + self.repo_path = self.temp_dir try: - # First clone with --no-checkout to save bandwidth subprocess.run( ["git", "clone", "--no-checkout", self.url, self.temp_dir], check=True, @@ -81,7 +81,6 @@ def _clone_repo(self) -> None: timeout=300 ) - # Fetch all branches subprocess.run( ["git", "-C", self.temp_dir, "fetch", "--all"], check=True, @@ -90,7 +89,6 @@ def _clone_repo(self) -> None: timeout=300 ) - # Verify the cloned repository has at least one commit verify_result = subprocess.run( ["git", "-C", self.temp_dir, "rev-list", "--count", "--all"], capture_output=True, @@ -138,7 +136,6 @@ def _get_all_branches(self) -> List[str]: check=True ) branches = [b.strip() for b in result.stdout.splitlines() if b.strip()] - # Filter out HEAD reference if present return [b for b in branches if not b.endswith('/HEAD')] except subprocess.CalledProcessError: return [] @@ -154,7 +151,7 @@ def _run_git_log(self, extra_args: List[str] = None) -> List[Dict[str, Any]]: "log", "--pretty=format:%H|%an|%ad|%s", "--date=iso", - "--all" # Include all branches and tags + "--all" ] if self.start_date: @@ -173,7 +170,7 @@ def _run_git_log(self, extra_args: List[str] = None) -> List[Dict[str, Any]]: capture_output=True, text=True, check=True, - timeout=120 # Increased timeout for large repositories + timeout=120 ) return self._parse_git_log(result.stdout) except subprocess.TimeoutExpired: @@ -206,7 +203,7 @@ def _parse_git_log(self, log_output: str) -> List[Dict[str, Any]]: "timestamp": timestamp }) except ValueError: - continue # Skip malformed log entries + continue return entries @@ -290,12 +287,80 @@ def create_pull_request( """ raise NotImplementedError("Pull request creation is not supported for generic Git URLs (URLFetcher).") + def get_authors(self, repo_names: List[str]) -> List[Dict[str, str]]: + """ + Retrieve unique authors from cloned repository using git log. + + Args: + repo_names: Not used for URL fetcher (single repo only). + + Returns: + List of unique author dictionaries with name and email. + """ + authors_set = set() + + try: + if not hasattr(self, 'repo_path') or not os.path.exists(self.repo_path): + print("Repository not cloned yet") + return [] + + cmd = [ + 'git', '-C', self.repo_path, 'log', + '--all', + '--format=%an|%ae' + ] + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=True + ) + + for line in result.stdout.strip().split('\n'): + if '|' in line: + name, email = line.split('|', 1) + authors_set.add((name.strip(), email.strip())) + + cmd_committer = [ + 'git', '-C', self.repo_path, 'log', + '--all', + '--format=%cn|%ce' + ] + + result_committer = subprocess.run( + cmd_committer, + capture_output=True, + text=True, + check=True + ) + + for line in result_committer.stdout.strip().split('\n'): + if '|' in line: + name, email = line.split('|', 1) + authors_set.add((name.strip(), email.strip())) + + authors_list = [ + {"name": name, "email": email} + for name, email in sorted(authors_set) + ] + + return authors_list + + except subprocess.CalledProcessError as e: + print(f"Git command failed: {e}") + return [] + except Exception as e: + print(f"Error in get_authors: {e}") + return [] + def clear(self) -> None: """Clean up temporary directory.""" if self.temp_dir and os.path.exists(self.temp_dir): try: shutil.rmtree(self.temp_dir, ignore_errors=True) except Exception: - pass # Ensure we don't raise during cleanup + pass finally: - self.temp_dir = None \ No newline at end of file + self.temp_dir = None + self.repo_path = None \ No newline at end of file diff --git a/tests/test_parser.py b/tests/test_parser.py index b5bacfe..a2ecfe4 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -627,4 +627,182 @@ def test_get_valid_target_branches_handles_api_errors(self, mock_github_class): with pytest.raises(Exception) as exc_info: fetcher.get_valid_target_branches("feature-branch") - assert "Failed to validate target branches" in str(exc_info.value) \ No newline at end of file + assert "Failed to validate target branches" in str(exc_info.value) + + +class TestGetAuthors: + """Test suite for get_authors() method across all fetchers""" + + @pytest.fixture + def mock_github_fetcher(self): + """Create a mocked GitHub fetcher""" + with patch('git_recap.providers.github_fetcher.Github') as mock_github: + fetcher = GitHubFetcher(pat="fake_token") + fetcher.github = mock_github + return fetcher + + def test_github_get_authors_single_repo(self, mock_github_fetcher): + """Test fetching authors from a single GitHub repository""" + # Mock repository and commits + mock_repo = Mock() + mock_commit1 = Mock() + mock_commit1.commit.author.name = "Alice" + mock_commit1.commit.author.email = "alice@example.com" + mock_commit1.commit.committer.name = "Alice" + mock_commit1.commit.committer.email = "alice@example.com" + + mock_commit2 = Mock() + mock_commit2.commit.author.name = "Bob" + mock_commit2.commit.author.email = "bob@example.com" + mock_commit2.commit.committer.name = "Bob" + mock_commit2.commit.committer.email = "bob@example.com" + + mock_repo.get_commits.return_value = [mock_commit1, mock_commit2] + mock_github_fetcher.github.get_repo.return_value = mock_repo + + # Execute + authors = mock_github_fetcher.get_authors(["owner/repo"]) + + # Assert + assert len(authors) == 2 + assert {"name": "Alice", "email": "alice@example.com"} in authors + assert {"name": "Bob", "email": "bob@example.com"} in authors + + def test_github_get_authors_multiple_repos(self, mock_github_fetcher): + """Test fetching authors from multiple GitHub repositories""" + # Mock two repositories + mock_repo1 = Mock() + mock_commit1 = Mock() + mock_commit1.commit.author.name = "Alice" + mock_commit1.commit.author.email = "alice@example.com" + mock_commit1.commit.committer.name = "Alice" + mock_commit1.commit.committer.email = "alice@example.com" + mock_repo1.get_commits.return_value = [mock_commit1] + + mock_repo2 = Mock() + mock_commit2 = Mock() + mock_commit2.commit.author.name = "Charlie" + mock_commit2.commit.author.email = "charlie@example.com" + mock_commit2.commit.committer.name = "Charlie" + mock_commit2.commit.committer.email = "charlie@example.com" + mock_repo2.get_commits.return_value = [mock_commit2] + + mock_github_fetcher.github.get_repo.side_effect = [mock_repo1, mock_repo2] + + # Execute + authors = mock_github_fetcher.get_authors(["owner/repo1", "owner/repo2"]) + + # Assert + assert len(authors) == 2 + assert {"name": "Alice", "email": "alice@example.com"} in authors + assert {"name": "Charlie", "email": "charlie@example.com"} in authors + + def test_github_get_authors_deduplication(self, mock_github_fetcher): + """Test that duplicate authors are properly deduplicated""" + mock_repo = Mock() + + # Create multiple commits from same author + mock_commit1 = Mock() + mock_commit1.commit.author.name = "Alice" + mock_commit1.commit.author.email = "alice@example.com" + mock_commit1.commit.committer.name = "Alice" + mock_commit1.commit.committer.email = "alice@example.com" + + mock_commit2 = Mock() + mock_commit2.commit.author.name = "Alice" + mock_commit2.commit.author.email = "alice@example.com" + mock_commit2.commit.committer.name = "Alice" + mock_commit2.commit.committer.email = "alice@example.com" + + mock_repo.get_commits.return_value = [mock_commit1, mock_commit2] + mock_github_fetcher.github.get_repo.return_value = mock_repo + + # Execute + authors = mock_github_fetcher.get_authors(["owner/repo"]) + + # Assert - should only have one unique author + assert len(authors) == 1 + assert authors[0] == {"name": "Alice", "email": "alice@example.com"} + + def test_github_get_authors_empty_list(self, mock_github_fetcher): + """Test fetching authors with empty repository list""" + # Mock get_user().get_repos() to return all repos + mock_user = Mock() + mock_repo = Mock() + mock_repo.full_name = "owner/repo" + + mock_commit = Mock() + mock_commit.commit.author.name = "Alice" + mock_commit.commit.author.email = "alice@example.com" + mock_commit.commit.committer.name = "Alice" + mock_commit.commit.committer.email = "alice@example.com" + + mock_repo_obj = Mock() + mock_repo_obj.get_commits.return_value = [mock_commit] + + mock_user.get_repos.return_value = [mock_repo] + mock_github_fetcher.github.get_user.return_value = mock_user + mock_github_fetcher.github.get_repo.return_value = mock_repo_obj + + # Execute with empty list + authors = mock_github_fetcher.get_authors([]) + + # Assert + assert len(authors) >= 0 # Should process all accessible repos + + @pytest.mark.asyncio + async def test_api_endpoint_success(self): + """Test the /api/authors endpoint with valid session""" + from fastapi.testclient import TestClient + from app.api.server.routes import router + + client = TestClient(router) + + # Mock fetcher service + with patch('app.api.server.routes.fetcher_service') as mock_service: + mock_fetcher = Mock() + mock_fetcher.get_authors.return_value = [ + {"name": "Alice", "email": "alice@example.com"}, + {"name": "Bob", "email": "bob@example.com"} + ] + mock_service.get_fetcher.return_value = mock_fetcher + + # Make request + response = client.post( + "/api/authors", + json={ + "session_id": "test_session_123", + "repo_names": ["owner/repo"] + } + ) + + # Assert + assert response.status_code == 200 + data = response.json() + assert data["total_count"] == 2 + assert len(data["authors"]) == 2 + + @pytest.mark.asyncio + async def test_api_endpoint_session_not_found(self): + """Test the /api/authors endpoint with invalid session""" + from fastapi.testclient import TestClient + from app.api.server.routes import router + + client = TestClient(router) + + # Mock fetcher service to return None + with patch('app.api.server.routes.fetcher_service') as mock_service: + mock_service.get_fetcher.return_value = None + + # Make request + response = client.post( + "/api/authors", + json={ + "session_id": "invalid_session", + "repo_names": [] + } + ) + + # Assert + assert response.status_code == 404 + assert "not found or expired" in response.json()["detail"] \ No newline at end of file From 671053d3d27d8e3f66d81ddd69bea0dadd98927a Mon Sep 17 00:00:00 2001 From: Compass AI Date: Mon, 15 Dec 2025 10:52:59 +0000 Subject: [PATCH 2/5] Add API endpoint to retrieve current authenticated user information Implemented a new `/api/current-author` endpoint that returns the authenticated user's name and email from the active Git provider session. This feature adds the `get_current_author()` method to all fetcher classes (GitHub, GitLab, Azure DevOps, and URL-based), with GitHub providing full implementation while other providers return None as they don't currently support this functionality. The endpoint includes proper error handling for missing sessions and provider-specific limitations. --- app/api/models/schemas.py | 11 ++++- app/api/server/routes.py | 71 ++++++++++++++++++++------- git_recap/providers/azure_fetcher.py | 14 +++++- git_recap/providers/base_fetcher.py | 15 ++++++ git_recap/providers/github_fetcher.py | 28 ++++++++++- git_recap/providers/gitlab_fetcher.py | 14 +++++- git_recap/providers/url_fetcher.py | 12 +++++ 7 files changed, 142 insertions(+), 23 deletions(-) diff --git a/app/api/models/schemas.py b/app/api/models/schemas.py index d056c51..25064e9 100644 --- a/app/api/models/schemas.py +++ b/app/api/models/schemas.py @@ -165,4 +165,13 @@ class GetAuthorsResponse(BaseModel): """Response model containing list of authors""" authors: List[AuthorInfo] = Field(..., description="List of unique authors") total_count: int = Field(..., description="Total number of unique authors") - repo_count: int = Field(..., description="Number of repositories processed") \ No newline at end of file + repo_count: int = Field(..., description="Number of repositories processed") + + +# --- Current Author Endpoint Schema --- +class GetCurrentAuthorResponse(BaseModel): + """Response model for current author endpoint.""" + author: Optional[Dict[str, str]] = Field( + None, + description="Current authenticated user's information (name and email), or None if not available" + ) \ No newline at end of file diff --git a/app/api/server/routes.py b/app/api/server/routes.py index 6f77d26..1edfcba 100644 --- a/app/api/server/routes.py +++ b/app/api/server/routes.py @@ -10,6 +10,9 @@ CreatePullRequestResponse, GetPullRequestDiffRequest, GetPullRequestDiffResponse, + GetAuthorsRequest, + GetAuthorsResponse, + AuthorInfo, ) from services.llm_service import set_llm, get_llm, trim_messages @@ -28,28 +31,14 @@ class CloneRequest(BaseModel): url: str -class GetAuthorsRequest(BaseModel): - """Request model for fetching authors from repositories.""" - session_id: str = Field(..., description="Session identifier") - repo_names: Optional[List[str]] = Field( - default=[], - description="List of repository names to fetch authors from. Empty list fetches from all repositories." +class GetCurrentAuthorResponse(BaseModel): + """Response model for current author endpoint.""" + author: Optional[Dict[str, str]] = Field( + None, + description="Current authenticated user's information (name and email), or None if not available" ) -class AuthorInfo(BaseModel): - """Individual author information.""" - name: str = Field(..., description="Author's name") - email: str = Field(..., description="Author's email address") - - -class GetAuthorsResponse(BaseModel): - """Response model containing list of authors.""" - authors: List[AuthorInfo] = Field(..., description="List of unique authors") - total_count: int = Field(..., description="Total number of unique authors") - repo_count: int = Field(..., description="Number of repositories processed") - - GITHUB_ACCESS_TOKEN_URL = 'https://github.com/login/oauth/access_token' @@ -492,4 +481,48 @@ async def get_authors(request: GetAuthorsRequest): raise HTTPException( status_code=500, detail=f"Error fetching authors: {str(e)}" + ) + + +@router.get("/api/current-author", response_model=GetCurrentAuthorResponse) +async def get_current_author(session_id: str = Query(..., description="Session identifier")): + """ + Retrieve the current authenticated user's information from the fetcher. + + Args: + session_id: The session identifier + + Returns: + GetCurrentAuthorResponse: Contains optional author information (name and email) + + Raises: + HTTPException: 404 if session not found, 500 for errors + """ + try: + fetcher = get_fetcher(session_id) + + if not fetcher: + raise HTTPException( + status_code=404, + detail=f"Session {session_id} not found or expired" + ) + + try: + author_info = fetcher.get_current_author() + except NotImplementedError: + author_info = None + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Error retrieving current author: {str(e)}" + ) + + return GetCurrentAuthorResponse(author=author_info) + + except HTTPException: + raise + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Error fetching current author: {str(e)}" ) \ No newline at end of file diff --git a/git_recap/providers/azure_fetcher.py b/git_recap/providers/azure_fetcher.py index 9fba869..2f9b148 100644 --- a/git_recap/providers/azure_fetcher.py +++ b/git_recap/providers/azure_fetcher.py @@ -358,4 +358,16 @@ def get_authors(self, repo_names: List[str]) -> List[Dict[str, str]]: except Exception as e: print(f"Error in get_authors: {e}") - return [] \ No newline at end of file + return [] + + def get_current_author(self) -> Optional[Dict[str, str]]: + """ + Retrieve the current authenticated user's information. + + For Azure DevOps, default author functionality is not currently implemented, + so this method returns None. + + Returns: + None: Azure DevOps fetcher does not support default author retrieval. + """ + return None \ No newline at end of file diff --git a/git_recap/providers/base_fetcher.py b/git_recap/providers/base_fetcher.py index d4df2ca..af8ea06 100644 --- a/git_recap/providers/base_fetcher.py +++ b/git_recap/providers/base_fetcher.py @@ -171,6 +171,21 @@ def get_authors(self, repo_names: List[str]) -> List[Dict[str, str]]: """ pass + @abstractmethod + def get_current_author(self) -> Optional[Dict[str, str]]: + """ + Retrieve the current authenticated user's information. + + Returns the default authenticated user's name and email if available + for the current fetcher session, or None if not applicable for the provider. + + Returns: + Optional[Dict[str, str]]: Dictionary with 'name' and 'email' keys, + or None if no default author is available. + Example: {"name": "John Doe", "email": "john@example.com"} + """ + pass + def get_authored_messages(self) -> List[Dict[str, Any]]: """ Aggregates all commit, pull request, and issue entries into a single list, diff --git a/git_recap/providers/github_fetcher.py b/git_recap/providers/github_fetcher.py index 73ec5c1..9c1893d 100644 --- a/git_recap/providers/github_fetcher.py +++ b/git_recap/providers/github_fetcher.py @@ -447,4 +447,30 @@ def get_authors(self, repo_names: List[str]) -> List[Dict[str, str]]: except Exception as e: print(f"Error in get_authors: {e}") - return [] \ No newline at end of file + return [] + + def get_current_author(self) -> Optional[Dict[str, str]]: + """ + Retrieve the current authenticated user's information. + + Returns the authenticated GitHub user's name and email if available. + + Returns: + Optional[Dict[str, str]]: Dictionary with 'name' and 'email' keys, + or None if user information is unavailable. + """ + try: + if not self.user: + logger.warning("No authenticated user available") + return None + + user_name = self.user.name or self.user.login or "Unknown" + user_email = self.user.email or f"{self.user.login}@users.noreply.github.com" + + return { + "name": user_name, + "email": user_email + } + except Exception as e: + logger.error(f"Error retrieving current author: {str(e)}") + return None \ No newline at end of file diff --git a/git_recap/providers/gitlab_fetcher.py b/git_recap/providers/gitlab_fetcher.py index 7797d6f..600d5df 100644 --- a/git_recap/providers/gitlab_fetcher.py +++ b/git_recap/providers/gitlab_fetcher.py @@ -299,4 +299,16 @@ def get_authors(self, repo_names: List[str]) -> List[Dict[str, str]]: except Exception as e: print(f"Error in get_authors: {e}") - return [] \ No newline at end of file + return [] + + def get_current_author(self) -> Optional[Dict[str, str]]: + """ + Retrieve the current authenticated user's information. + + For GitLab, default author functionality is not currently implemented, + so this method returns None. + + Returns: + None: GitLab fetcher does not support default author retrieval. + """ + return None \ No newline at end of file diff --git a/git_recap/providers/url_fetcher.py b/git_recap/providers/url_fetcher.py index 624e02e..1be42df 100644 --- a/git_recap/providers/url_fetcher.py +++ b/git_recap/providers/url_fetcher.py @@ -354,6 +354,18 @@ def get_authors(self, repo_names: List[str]) -> List[Dict[str, str]]: print(f"Error in get_authors: {e}") return [] + def get_current_author(self) -> Optional[Dict[str, str]]: + """ + Retrieve the current authenticated user's information. + + For URL-based cloning, there is no authenticated user context, + so this method always returns None. + + Returns: + None: URL fetcher has no default author. + """ + return None + def clear(self) -> None: """Clean up temporary directory.""" if self.temp_dir and os.path.exists(self.temp_dir): From 0c71a115d47f77b03bf68819bc750cd886963891 Mon Sep 17 00:00:00 2001 From: Compass AI Date: Mon, 15 Dec 2025 11:42:04 +0000 Subject: [PATCH 3/5] Update actions endpoint to return structured response with trimming metadata and user notifications Modified the `/actions` endpoint to return a structured response that includes the list of Git actionables along with metadata about token limit trimming operations. When the free version's token limit requires trimming older actionables, the API now provides a user-facing informational message explaining the situation. The frontend displays this message in a dismissible banner, improving transparency about context limitations. Added proper response schema validation and type definitions to ensure consistent API behavior. --- app/api/models/schemas.py | 33 ++++++++++++++++- app/api/server/routes.py | 41 +++++++++++++++++--- app/git-recap/src/App.tsx | 78 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 141 insertions(+), 11 deletions(-) diff --git a/app/api/models/schemas.py b/app/api/models/schemas.py index 25064e9..499f45a 100644 --- a/app/api/models/schemas.py +++ b/app/api/models/schemas.py @@ -174,4 +174,35 @@ class GetCurrentAuthorResponse(BaseModel): author: Optional[Dict[str, str]] = Field( None, description="Current authenticated user's information (name and email), or None if not available" - ) \ No newline at end of file + ) + + +# --- Actions Response Schema --- +class ActionsResponse(BaseModel): + """ + Structured response for the actions endpoint. + + This model encapsulates the response from the actions endpoint, including + the list of Git actionables, an optional user-facing informational message, + and metadata about any trimming operations performed to satisfy token limits. + + Attributes: + actions: Formatted string containing Git actionables (commits, PRs, issues, etc.) + message: User-facing informational message (optional, present when trimming occurs) + trimmed_count: Number of actionables removed during trimming to satisfy token limits + total_count: Original number of actionables before any trimming was applied + """ + actions: str = Field(..., description="Formatted string of Git actionables") + message: Optional[str] = Field(None, description="User-facing informational message about trimming") + trimmed_count: int = Field(0, description="Number of items removed during trimming") + total_count: int = Field(..., description="Total number of items before trimming") + + class Config: + json_schema_extra = { + "example": { + "actions": "2025-03-14:\n - [Commit] in repo-frontend: Fix bug in authentication\n - [Pull Request] in repo-backend: Add new API endpoint (PR #42)\n\n2025-03-15:\n - [Commit] in repo-core: Update dependencies\n", + "message": "We're running the free version with a maximum token limit for contextual input. To stay within this limit, we automatically trimmed 15 older Git actionables from the context. We hope you understand!", + "trimmed_count": 15, + "total_count": 50 + } + } \ No newline at end of file diff --git a/app/api/server/routes.py b/app/api/server/routes.py index 1edfcba..734b5dc 100644 --- a/app/api/server/routes.py +++ b/app/api/server/routes.py @@ -13,6 +13,7 @@ GetAuthorsRequest, GetAuthorsResponse, AuthorInfo, + ActionsResponse, ) from services.llm_service import set_llm, get_llm, trim_messages @@ -184,7 +185,7 @@ async def get_repos(session_id: str): return {"repos": fetcher.repos_names} -@router.get("/actions") +@router.get("/actions", response_model=ActionsResponse) async def get_actions( session_id: str, start_date: Optional[str] = Query(None), @@ -195,6 +196,10 @@ async def get_actions( """ Get actions for the specified session with optional filters. + Returns a structured response including the actions list, user-facing + informational message (if trimming occurred), and metadata about the + trimming operation. + Args: session_id: The session identifier start_date: Optional start date filter @@ -203,7 +208,7 @@ async def get_actions( authors: Optional list of authors to filter Returns: - dict: Contains formatted action entries + ActionsResponse: Structured response with actions, message, and metadata Raises: HTTPException: 404 if session not found @@ -228,10 +233,36 @@ async def get_actions( llm = get_llm(session_id) actions = fetcher.get_authored_messages() - actions = trim_messages(actions, llm.tokenizer) - print(f"\n\n\n{actions=}\n\n\n") - return {"actions": parse_entries_to_txt(actions)} + # Store original count before trimming + original_count = len(actions) + + # Apply token limit trimming + trimmed_actions = trim_messages(actions, llm.tokenizer) + + # Calculate how many items were removed + trimmed_count = original_count - len(trimmed_actions) + + # Generate user-facing message if trimming occurred + message = None + if trimmed_count > 0: + message = ( + f"We're running the free version with a maximum token limit for contextual input. " + f"To stay within this limit, we automatically trimmed {trimmed_count} older Git " + f"actionable{'s' if trimmed_count != 1 else ''} from the context. " + f"We hope you understand!" + ) + + # Parse actions to text format + actions_txt = parse_entries_to_txt(trimmed_actions) + + # Return structured response + return ActionsResponse( + actions=actions_txt, + message=message, + trimmed_count=trimmed_count, + total_count=original_count + ) @router.get("/release_notes") diff --git a/app/git-recap/src/App.tsx b/app/git-recap/src/App.tsx index 8065a68..1677f96 100644 --- a/app/git-recap/src/App.tsx +++ b/app/git-recap/src/App.tsx @@ -1,12 +1,11 @@ +typescript import { useState, useEffect, useRef, useCallback } from 'react'; -import { Github, Hammer, BookText, Plus, Minus } from 'lucide-react'; +import { Github, Hammer, BookText, Plus, Minus, AlertCircle } from 'lucide-react'; import githubIcon from './assets/github-mark-white.png'; import { toPng } from 'html-to-image'; import ReactMarkdown from 'react-markdown'; import './App.css'; -import { Info } from "lucide-react"; - import { Button, Card, @@ -20,6 +19,14 @@ import { Popup } from 'pixel-retroui'; +// Type definition for the structured response from actions endpoint +interface ActionsResponse { + actions: string; + message?: string; + trimmed_count: number; + total_count: number; +} + function App() { const [pat, setPat] = useState(''); const [codeHost, setCodeHost] = useState(() => { @@ -94,6 +101,9 @@ function App() { const [showReleaseMode, setShowReleaseMode] = useState(false); const [showMenu, setShowMenu] = useState(false); + // Info message state for displaying backend notifications + const [infoMessage, setInfoMessage] = useState(null); + const actionsLogRef = useRef(null); const summaryLogRef = useRef(null); const textAreaRef = useRef(null); @@ -182,6 +192,7 @@ function App() { setProgressWs(0); setShowExportButton(false); setRecapDone(true); + setInfoMessage(null); // Clear any previous info message handleRecap(); }; @@ -287,6 +298,7 @@ function App() { setIsExecutingReleaseNotes(true); setShowExportButton(false); setRecapDone(false); + setInfoMessage(null); // Clear any previous info message setTimeout(() => { actionsLogRef.current?.scrollIntoView({ behavior: 'smooth' }); }, 100); @@ -348,6 +360,7 @@ function App() { setProgressWs(0); setIsExecuting(true); setShowExportButton(false); + setInfoMessage(null); // Clear any previous info message setTimeout(() => { actionsLogRef.current?.scrollIntoView({ behavior: 'smooth' }); }, 100); @@ -367,7 +380,10 @@ function App() { method: 'GET' }); if (!response.ok) throw new Error(`Request failed! Status: ${response.status}`); - const data = await response.json(); + + // Parse the structured response + const data: ActionsResponse = await response.json(); + if (!data.actions) { setPopupMessage('Got no actionables from Git. Please check your filters or date range. If you are signing with GitHub, you will need to install GitRecap from the Marketplace or authenticate with a PAT instead.'); setIsPopupOpen(true); @@ -375,7 +391,16 @@ function App() { setProgressActions(100); return; } + + // Set the actions output setCommitsOutput(data.actions); + + // Display informational message if present + if (data.message) { + setInfoMessage(data.message); + console.log(`Trimming info: ${data.trimmed_count} items removed out of ${data.total_count} total`); + } + clearInterval(progressActionsInterval); setProgressActions(100); summaryLogRef.current?.scrollIntoView({ behavior: 'smooth' }); @@ -1073,6 +1098,41 @@ function App() { setExportModalOpen(false); }, [githubUsername, badgeTheme, generateBadgeContent]); + // Info banner component for displaying backend messages + const InfoBanner: React.FC<{ message: string; onDismiss: () => void }> = ({ + message, + onDismiss + }) => ( +
+ + {message} + +
+ ); + return (
@@ -1081,6 +1141,14 @@ function App() {

Git Recap

+ {/* Display informational message if present */} + {infoMessage && ( + setInfoMessage(null)} + /> + )} +
{!isAuthorized ? ( <> @@ -1224,7 +1292,7 @@ function App() {