diff --git a/backend/code_review_backend/app/urls.py b/backend/code_review_backend/app/urls.py index f6f4cd8c8..b7862ab28 100644 --- a/backend/code_review_backend/app/urls.py +++ b/backend/code_review_backend/app/urls.py @@ -11,6 +11,7 @@ from rest_framework import permissions from code_review_backend.issues import api +from code_review_backend.issues.v2 import api as api_v2 # Build Swagger schema view schema_view = get_schema_view( @@ -28,6 +29,7 @@ urlpatterns = [ path("", lambda request: redirect("docs/", permanent=False)), path("v1/", include(api.urls)), + path("v2/", include(api_v2.urls)), path("admin/", admin.site.urls), path( r"docs\.json|\.yaml)", diff --git a/backend/code_review_backend/issues/v2/__init__.py b/backend/code_review_backend/issues/v2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/code_review_backend/issues/v2/api.py b/backend/code_review_backend/issues/v2/api.py new file mode 100644 index 000000000..8cee8f27c --- /dev/null +++ b/backend/code_review_backend/issues/v2/api.py @@ -0,0 +1,127 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from django.core.exceptions import BadRequest +from django.db.models import Exists, OuterRef +from django.shortcuts import get_object_or_404 +from django.urls import path +from django.utils.functional import cached_property +from rest_framework.exceptions import ValidationError +from rest_framework.generics import ListAPIView + +from code_review_backend.issues.models import Diff, IssueLink +from code_review_backend.issues.v2.serializers import DiffIssuesSerializer + + +class IssueList(ListAPIView): + """ + Returns issues that are linked to a diff, and depending on the selected mode: + * known: Issues that have also being detected from the base repository (e.g. mozilla-central). + `previous_diff_id` is always returned as null when listing known issues. + * unresolved: Issues that were linked to the previous diff of the same parent revision, and are + still present on the current diff. Filters out issues that are known by the backend. + * closed: Issues that were listed on the previous diff of the same parent revision, but does + not exist on the current diff. + + This endpoint does not uses pagination. + """ + + allowed_modes = ["unresolved", "known", "closed"] + pagination_class = None + + def get_serializer(self, queryset, many=True): + return DiffIssuesSerializer( + { + "previous_diff_id": self.previous_diff and self.previous_diff.id, + "issues": queryset, + }, + context=self.get_serializer_context(), + ) + + @cached_property + def diff(self): + return get_object_or_404( + Diff.objects.select_related("revision"), id=self.kwargs["diff_id"] + ) + + @cached_property + def previous_diff(self): + if self.mode == "known": + # No need to search for a previous diff for known issues + return None + return ( + self.diff.revision.diffs.filter(created__lt=self.diff.created) + .order_by("created") + .last() + ) + + @cached_property + def mode(self): + if (mode := self.kwargs["mode"]) not in self.allowed_modes: + raise ValidationError( + detail=f"mode argument must be one of {self.allowed_modes}" + ) + return mode + + def get_queryset(self): + return getattr(self, f"{self.mode}_issues").filter(**{self.mode: True}) + + def distinct_issues(self, qs): + """ + Convert a list of issue links to unique couples of (issue_id, issue_hash) + """ + attributes = ("issue_id", "issue__hash") + return qs.order_by(*attributes).values(*attributes).distinct(*attributes) + + @property + def known_issues(self): + return self.distinct_issues( + self.diff.issue_links.annotate( + known=Exists( + IssueLink.objects.filter( + revision__head_repository_id=self.diff.revision.base_repository_id, + issue=OuterRef("issue"), + ) + ) + ) + ) + + @property + def unresolved_issues(self): + if not self.previous_diff: + raise BadRequest("No previous diff was found to compare issues") + return self.distinct_issues( + self.known_issues.filter(known=False).annotate( + unresolved=Exists( + IssueLink.objects.filter( + diff=self.previous_diff, + issue=OuterRef("issue"), + ) + ) + ) + ) + + @property + def closed_issues(self): + if not self.previous_diff: + raise BadRequest("No previous diff was found to compare issues") + return self.distinct_issues( + self.previous_diff.issue_links.annotate( + closed=~Exists( + IssueLink.objects.filter( + diff=self.diff, + issue=OuterRef("issue"), + ) + ) + ) + ) + + +urls = [ + path( + "diff//issues//", + IssueList.as_view(), + name="issue-list", + ), +] diff --git a/backend/code_review_backend/issues/v2/serializers.py b/backend/code_review_backend/issues/v2/serializers.py new file mode 100644 index 000000000..c384c4efa --- /dev/null +++ b/backend/code_review_backend/issues/v2/serializers.py @@ -0,0 +1,19 @@ +from rest_framework import serializers + +from code_review_backend.issues.models import IssueLink + + +class IssueLinkHashSerializer(serializers.ModelSerializer): + """Serialize an Issue hash from the IssueLink M2M""" + + id = serializers.UUIDField(source="issue_id", read_only=True) + hash = serializers.CharField(source="issue__hash", max_length=32, read_only=True) + + class Meta: + model = IssueLink + fields = ("id", "hash") + + +class DiffIssuesSerializer(serializers.Serializer): + previous_diff_id = serializers.UUIDField(allow_null=True, read_only=True) + issues = IssueLinkHashSerializer(many=True) diff --git a/backend/code_review_backend/issues/v2/tests/__init__.py b/backend/code_review_backend/issues/v2/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/code_review_backend/issues/v2/tests/test_list_issues.py b/backend/code_review_backend/issues/v2/tests/test_list_issues.py new file mode 100644 index 000000000..fc0939268 --- /dev/null +++ b/backend/code_review_backend/issues/v2/tests/test_list_issues.py @@ -0,0 +1,175 @@ +from rest_framework import status +from rest_framework.test import APITestCase + +from code_review_backend.issues.models import Issue, Repository + + +class ListIssuesTestCase(APITestCase): + def setUp(self): + # Create the main repository + self.repo_main = Repository.objects.create( + slug="mozilla-central", url="https://hg.mozilla.org/mozilla-central" + ) + self.repo_try = Repository.objects.create( + slug="myrepo-try", url="http://repo.test/try" + ) + + self.issues = Issue.objects.bulk_create( + [ + Issue(**attrs) + for attrs in [ + {"hash": "0" * 32}, + {"hash": "1" * 32}, + {"hash": "2" * 32}, + {"hash": "3" * 32}, + ] + ] + ) + + # Create historic revision with some issues + self.revision_main = self.repo_main.head_revisions.create( + phabricator_id=456, + phabricator_phid="PHID-REV-XXX", + title="Main revision", + base_repository=self.repo_main, + head_repository=self.repo_main, + ) + self.revision_main.issue_links.create(issue=self.issues[0]) + self.revision_main.issue_links.create(issue=self.issues[1]) + + # Create a new revions, with two successive diffs + self.revision_last = self.repo_try.head_revisions.create( + phabricator_id=1337, + phabricator_phid="PHID-REV-YYY", + title="Bug XXX - Yet another bug", + base_repository=self.repo_main, + head_repository=self.repo_try, + ) + self.diff1 = self.revision_last.diffs.create( + id=1, + repository=self.repo_try, + review_task_id="deadbeef123", + phid="PHID-DIFF-xxx", + ) + self.diff1.issue_links.create(revision=self.revision_last, issue=self.issues[0]) + self.diff1.issue_links.create(revision=self.revision_last, issue=self.issues[2]) + self.diff1.issue_links.create(revision=self.revision_last, issue=self.issues[3]) + + self.diff2 = self.revision_last.diffs.create( + id=2, + repository=self.repo_try, + review_task_id="coffee12345", + phid="PHID-DIFF-yyy", + ) + self.diff2.issue_links.create(revision=self.revision_last, issue=self.issues[0]) + self.diff2.issue_links.create(revision=self.revision_last, issue=self.issues[2]) + + def test_list_issues_invalid_mode(self): + with self.assertNumQueries(0): + response = self.client.get( + f"/v2/diff/{self.diff2.id}/issues/test/", format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertListEqual( + response.json(), + ["mode argument must be one of ['unresolved', 'known', 'closed']"], + ) + + def test_list_issues_invalid_diff(self): + with self.assertNumQueries(1): + response = self.client.get("/v2/diff/424242/issues/known/") + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + + def test_list_issues_known(self): + with self.assertNumQueries(2): + response = self.client.get(f"/v2/diff/{self.diff2.id}/issues/known/") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.maxDiff = None + self.assertDictEqual( + response.json(), + { + "previous_diff_id": None, + "issues": [ + {"id": str(self.issues[0].id), "hash": self.issues[0].hash}, + ], + }, + ) + + def test_list_issues_unresolved_first_diff(self): + """No issue can be marked as unresolved on a new diff""" + with self.assertNumQueries(2): + response = self.client.get( + f"/v2/diff/{self.diff1.id}/issues/unresolved/", format="json" + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + def test_list_issues_unresolved(self): + with self.assertNumQueries(3): + response = self.client.get(f"/v2/diff/{self.diff2.id}/issues/unresolved/") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertDictEqual( + response.json(), + { + "previous_diff_id": "1", + "issues": [ + {"id": str(self.issues[2].id), "hash": self.issues[2].hash}, + ], + }, + ) + + def test_list_issues_closed_first_diff(self): + """No issue can be marked as closed on a new diff""" + with self.assertNumQueries(2): + response = self.client.get(f"/v2/diff/{self.diff1.id}/issues/closed/") + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + def test_list_issues_closed(self): + with self.assertNumQueries(3): + response = self.client.get(f"/v2/diff/{self.diff2.id}/issues/closed/") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertDictEqual( + response.json(), + { + "previous_diff_id": "1", + "issues": [{"id": str(self.issues[3].id), "hash": self.issues[3].hash}], + }, + ) + + def test_list_issues_new_diff_reopen(self): + """ + Open a new diff with 1 known issue and one that has been closed in the previous diff. + Both are listed as new, and the two issues of the previous diff listed as closed. + """ + diff = self.revision_last.diffs.create( + id=3, + repository=self.repo_try, + review_task_id="42", + phid="PHID-DIFF-42", + ) + new_issue = Issue.objects.create(hash="4" * 32) + diff.issue_links.create(revision=self.revision_last, issue=new_issue) + diff.issue_links.create(revision=self.revision_last, issue=self.issues[3]) + self.assertDictEqual( + self.client.get(f"/v2/diff/{diff.id}/issues/unresolved/").json(), + { + "previous_diff_id": "2", + "issues": [], + }, + ) + self.assertDictEqual( + self.client.get(f"/v2/diff/{diff.id}/issues/known/").json(), + { + "previous_diff_id": None, + "issues": [], + }, + ) + self.assertDictEqual( + self.client.get(f"/v2/diff/{diff.id}/issues/closed/").json(), + { + "previous_diff_id": "2", + "issues": [ + {"id": str(self.issues[0].id), "hash": self.issues[0].hash}, + {"id": str(self.issues[2].id), "hash": self.issues[2].hash}, + ], + }, + ) diff --git a/bot/code_review_bot/backend.py b/bot/code_review_bot/backend.py index 17019b5b3..86ef2e2aa 100644 --- a/bot/code_review_bot/backend.py +++ b/bot/code_review_bot/backend.py @@ -6,6 +6,7 @@ import requests import structlog +from requests.exceptions import HTTPError from code_review_bot import taskcluster from code_review_bot.config import GetAppUserAgent, settings @@ -187,6 +188,32 @@ def list_diff_issues(self, diff_id): """ return list(self.paginate(f"/v1/diff/{diff_id}/issues/")) + def list_diff_issues_v2(self, diff_id, mode): + """ + List issues for a given diff, applying specific filters. + Returns: + * The ID of the previous diff if it exists (always null for `known` mode). + * A list of issues (dict serializing ID and hash) corresponding to the filter. + """ + assert mode in ("known", "unresolved", "closed") + try: + data = self.get(f"/v2/diff/{diff_id}/issues/{mode}") + except HTTPError as e: + if e.response.status_code != 404: + logger.warning( + f"Could not list {mode} issues from the bot: {e}. Skipping" + ) + else: + logger.info(f"Diff not found in code review backend: {diff_id}") + return None, [] + return data["previous_diff_id"], data["issues"] + + def get(self, url): + auth = (self.username, self.password) + resp = requests.get(url, auth=auth, headers=GetAppUserAgent()) + resp.raise_for_status() + return resp.json() + def paginate(self, url_path): """ Yield results from a paginated API one by one diff --git a/bot/code_review_bot/report/phabricator.py b/bot/code_review_bot/report/phabricator.py index c46dc5486..3c1b2f8b1 100644 --- a/bot/code_review_bot/report/phabricator.py +++ b/bot/code_review_bot/report/phabricator.py @@ -2,7 +2,6 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from collections import defaultdict from typing import List from urllib.parse import urljoin @@ -30,7 +29,7 @@ IMPORTANT: Found {nb_errors} (error level) that must be fixed before landing. """ -COMMENT_DIFF_FOLLOWUP = """compared to the previous diff [{diff_id}]({phabricator_diff_url}). +COMMENT_DIFF_FOLLOWUP = """compared to the previous diff. """ COMMENT_RUN_ANALYZERS = """ @@ -110,53 +109,6 @@ def pluralize(word, nb): assert isinstance(nb, int) return "{} {}".format(nb, nb == 1 and word or word + "s") - def compare_issues(self, former_diff_id, issues): - """ - Compare new issues depending on their evolution from the - previous diff on the same revision. - Returns a tuple containing lists of: - * Unresolved issues, that are present on both diffs - * Closed issues, that were present in the previous diff and are now gone - """ - if not self.backend_api.enabled: - logger.warning( - "Backend API must be enabled to compare issues with previous diff {former_diff_id}." - ) - return [], [] - - # If this is the first diff, there's no need to compare issues with a - # previous diff since there is no previous diff. - if former_diff_id is None: - return [], [] - - # Retrieve issues related to the previous diff - try: - previous_issues = self.backend_api.list_diff_issues(former_diff_id) - except Exception as e: - logger.warning( - f"An error occurred listing issues on previous diff {former_diff_id}: {e}. " - "Each issue will be considered as a new case." - ) - previous_issues = [] - - # Multiple issues may share a similar hash in case they were - # produced by the same linter on the same lines - indexed_issues = defaultdict(list) - for issue in previous_issues: - indexed_issues[issue["hash"]].append(issue) - - # Compare current issues with the previous ones based on the hash - unresolved = [ - issue.on_backend["hash"] - for issue in issues - if issue.on_backend and issue.on_backend["hash"] in indexed_issues - ] - - # All previous issues that are not unresolved are closed - closed = [issue for issue in previous_issues if issue["hash"] not in unresolved] - - return unresolved, closed - def publish(self, issues, revision, task_failures, notices, reviewers): """ Publish issues on Phabricator: @@ -202,9 +154,9 @@ def publish(self, issues, revision, task_failures, notices, reviewers): if patch.analyzer.name not in self.analyzers_skipped ] - if publishable_issues: - # Publish detected patch's issues on Harbormaster, all at once, as lint issues - self.publish_harbormaster(revision, publishable_issues) + #if publishable_issues: + # # Publish detected patch's issues on Harbormaster, all at once, as lint issues + # self.publish_harbormaster(revision, publishable_issues) # Retrieve all diffs for the current revision rev_diffs = self.api.search_diffs(revision_phid=revision.phabricator_phid) @@ -215,18 +167,20 @@ def publish(self, issues, revision, task_failures, notices, reviewers): ) return publishable_issues, patches - # Compare issues that are not known on the repository to a previous diff - older_diff_ids = [ - diff["id"] for diff in rev_diffs if diff["id"] < revision.diff_id - ] - former_diff_id = sorted(older_diff_ids)[-1] if older_diff_ids else None - unresolved_issues, closed_issues = self.compare_issues( - former_diff_id, publishable_issues - ) + if not self.backend_api.enabled: + logger.warning( + "Backend API must be enabled to compare issues with the previous diff." + ) + unresolved, closed = [], [] + else: + unresolved = self.backend_api.list_diff_issues_v2( + revision.diff_id, "unresolved" + ) + closed = self.backend_api.list_diff_issues_v2(revision.diff_id, "closed") if ( - len(unresolved_issues) == len(publishable_issues) - and not closed_issues + len(unresolved) == len(publishable_issues) + and not closed and not task_failures and not notices ): @@ -234,7 +188,7 @@ def publish(self, issues, revision, task_failures, notices, reviewers): logger.info( "No new issues nor failures/notices were detected. " "Skipping comment publication (some issues are unresolved)", - unresolved_count=len(unresolved_issues), + unresolved_count=len(unresolved), ) return publishable_issues, patches @@ -245,9 +199,8 @@ def publish(self, issues, revision, task_failures, notices, reviewers): patches, task_failures, notices, - former_diff_id=former_diff_id, - unresolved_count=len(unresolved_issues), - closed_count=len(closed_issues), + unresolved_count=len(unresolved), + closed_count=len(closed), ) # Publish statistics @@ -263,19 +216,19 @@ def publish_harbormaster( Publish issues through HarborMaster either as lint results or unit tests results """ - assert lint_issues or unit_issues, "No issues to publish" - - self.api.update_build_target( - revision.build_target_phid, - state=BuildState.Work, - lint=[issue.as_phabricator_lint() for issue in lint_issues], - unit=[issue.as_phabricator_unitresult() for issue in unit_issues], - ) - logger.info( - "Updated Harbormaster build state with issues", - nb_lint=len(lint_issues), - nb_unit=len(unit_issues), - ) + #assert lint_issues or unit_issues, "No issues to publish" + + #self.api.update_build_target( + # revision.build_target_phid, + # state=BuildState.Work, + # lint=[issue.as_phabricator_lint() for issue in lint_issues], + # unit=[issue.as_phabricator_unitresult() for issue in unit_issues], + #) + #logger.info( + # "Updated Harbormaster build state with issues", + # nb_lint=len(lint_issues), + # nb_unit=len(unit_issues), + #) def publish_summary( self, @@ -284,28 +237,36 @@ def publish_summary( patches, task_failures, notices, - former_diff_id, unresolved_count, closed_count, ): """ Summarize publishable issues through Phabricator comment """ - self.api.comment( - revision.phabricator_id, - self.build_comment( - revision=revision, - issues=issues, - patches=patches, - bug_report_url=BUG_REPORT_URL, - task_failures=task_failures, - notices=notices, - former_diff_id=former_diff_id, - unresolved=unresolved_count, - closed=closed_count, - ), - ) - logger.info("Published phabricator summary") + raise Exception(self.build_comment( + revision=revision, + issues=issues, + patches=patches, + bug_report_url=BUG_REPORT_URL, + task_failures=task_failures, + notices=notices, + unresolved=unresolved_count, + closed=closed_count, + )) + #self.api.comment( + # revision.phabricator_id, + # self.build_comment( + # revision=revision, + # issues=issues, + # patches=patches, + # bug_report_url=BUG_REPORT_URL, + # task_failures=task_failures, + # notices=notices, + # unresolved=unresolved_count, + # closed=closed_count, + # ), + #) + #logger.info("Published phabricator summary") def build_comment( self, @@ -315,7 +276,6 @@ def build_comment( notices, patches=[], task_failures=[], - former_diff_id=None, unresolved=0, closed=0, ): @@ -367,13 +327,7 @@ def build_comment( followup_comment += "and " if closed: followup_comment += self.pluralize("defect", closed) + " closed " - followup_comment += COMMENT_DIFF_FOLLOWUP.format( - phabricator_diff_url=self.phabricator_diff_url.format( - diff_id=former_diff_id - ), - diff_id=former_diff_id, - ) - comment += followup_comment + comment += COMMENT_DIFF_FOLLOWUP # Add colored warning section total_warnings = sum(1 for i in issues if i.level == Level.Warning) diff --git a/bot/code_review_bot/workflow.py b/bot/code_review_bot/workflow.py index 48e19e4df..f953732bf 100644 --- a/bot/code_review_bot/workflow.py +++ b/bot/code_review_bot/workflow.py @@ -3,7 +3,6 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from datetime import datetime, timedelta -from itertools import groupby import structlog from libmozdata.phabricator import BuildState, PhabricatorAPI @@ -99,24 +98,8 @@ def run(self, revision): # Analyze issues in case the before/after feature is enabled if revision.before_after_feature: logger.info("Running the before/after feature") - # Search a base revision from the decision task - decision = self.queue_service.task(settings.try_group_id) - base_rev_changeset = ( - decision.get("payload", {}).get("env", {}).get("GECKO_BASE_REV") - ) - if not base_rev_changeset: - logger.warning( - "Base revision changeset could not be fetched from Phabricator, " - "looking for existing issues based on the current date", - task=settings.try_group_id, - ) - - # Clone local repo when required - # as find_previous_issues will build the hashes - self.clone_repository(revision) - # Mark know issues to avoid publishing them on this patch - self.find_previous_issues(issues, base_rev_changeset) + self.find_previous_issues(revision.diff_id, issues) new_issues_count = sum(issue.new_issue for issue in issues) logger.info( f"Found {new_issues_count} new issues (over {len(issues)} total detected issues)", @@ -284,12 +267,12 @@ def publish(self, revision, issues, task_failures, notices, reviewers): ) # Publish final HarborMaster state - self.update_status( - revision, - BuildState.Fail - if nb_publishable_errors > 0 or task_failures - else BuildState.Pass, - ) + #self.update_status( + # revision, + # BuildState.Fail + # if nb_publishable_errors > 0 or task_failures + # else BuildState.Pass, + #) def index(self, revision, **kwargs): """ @@ -348,39 +331,16 @@ def index(self, revision, **kwargs): }, ) - def find_previous_issues(self, issues, base_rev_changeset=None): - """ - Look for known issues in the backend matching the given list of issues - - If a base revision ID is provided, compare to issues detected on this revision - Otherwise, compare to issues detected on last ingested revision - """ + def find_previous_issues(self, diff_id, issues): + """Look for known issues in the backend""" assert ( self.backend_api.enabled ), "Backend storage is disabled, comparing issues is not possible" - current_date = datetime.now().strftime("%Y-%m-%d") - - # Group issues by path, so we only list know issues for the affected files - issues_groups = groupby( - sorted(issues, key=lambda i: i.path), - lambda i: i.path, - ) - logger.info( - "Checking for existing issues in the backend", - base_revision_changeset=base_rev_changeset, - ) - - for path, group_issues in issues_groups: - known_issues = self.backend_api.list_repo_issues( - "mozilla-central", - date=current_date, - revision_changeset=base_rev_changeset, - path=path, - ) - hashes = [issue["hash"] for issue in known_issues] - for issue in group_issues: - issue.new_issue = bool(issue.hash and issue.hash not in hashes) + known_issues = self.backend_api.list_diff_issues_v2(diff_id, "known") + known_hashes = [issue["hash"] for issue in known_issues] + for issue in issues: + issue.new_issue = bool(issue.hash and issue.hash not in known_hashes) def find_issues(self, revision, group_id): """ @@ -551,5 +511,5 @@ def update_status(self, revision, state): ) return - self.phabricator.update_build_target(revision.build_target_phid, state) - logger.info("Updated HarborMaster status", state=state, revision=revision) + #self.phabricator.update_build_target(revision.build_target_phid, state) + #logger.info("Updated HarborMaster status", state=state, revision=revision)