From e020730ad5dfdab91f4adacc308d22583a4fc8e6 Mon Sep 17 00:00:00 2001 From: Virginia Dooley Date: Thu, 27 Jun 2024 15:27:41 +0100 Subject: [PATCH 1/5] Validate Linkedin urls --- ynr/apps/people/forms/forms.py | 13 +++++++++++ ynr/apps/people/helpers.py | 23 ++++++++++++++++++- .../tests/test_person_form_identifier_crud.py | 21 +++++++++++++++++ ynr/apps/ynr_refactoring/settings.py | 2 +- 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/ynr/apps/people/forms/forms.py b/ynr/apps/people/forms/forms.py index 16dcca8037..2b156d0821 100644 --- a/ynr/apps/people/forms/forms.py +++ b/ynr/apps/people/forms/forms.py @@ -19,6 +19,7 @@ StrippedCharField, ) from people.helpers import ( + clean_linkedin_url, clean_mastodon_username, clean_twitter_username, clean_wikidata_id, @@ -128,6 +129,18 @@ def clean_twitter_username(self, username): except ValueError as e: raise ValidationError(e) + def clean_linkedin_url(self, url): + if self.instance.value != url: + self.instance.internal_identifier = None + + if self.instance.internal_identifier: + return url + + try: + return clean_linkedin_url(url) + except ValueError as e: + raise ValidationError(e) + def clean_mastodon_username(self, username): if self.instance.value != username: self.instance.internal_identifier = None diff --git a/ynr/apps/people/helpers.py b/ynr/apps/people/helpers.py index a8df99634c..7bfeca503f 100644 --- a/ynr/apps/people/helpers.py +++ b/ynr/apps/people/helpers.py @@ -1,6 +1,6 @@ import contextlib import re -from urllib.parse import urlparse +from urllib.parse import urlparse, urlunparse from candidates.mastodon_api import ( MastodonAPITokenMissing, @@ -120,6 +120,27 @@ def clean_twitter_username(username): return username +def clean_linkedin_url(url): + parsed_url = urlparse(url) + valid = True + if not re.match(r"([^.]+)?\.linkedin.com$", parsed_url.netloc): + valid = False + path = parsed_url.path + if path.startswith("/pub/"): + parts = path.split("/") + name = parts[2] + id_parts = parts[3:] + user_id = "".join(id_parts[::-1]) + path = f"/in/{name}-{user_id}/" + + if not path.startswith("/in/"): + valid = False + + if not valid: + raise ValueError("Please enter a valid LinkedIn URL.") + return urlunparse(parsed_url._replace(path=path)) + + def clean_wikidata_id(identifier): identifier = identifier.strip().lower() m = re.search(r"^.*wikidata.org/(wiki|entity)/(\w+)", identifier) diff --git a/ynr/apps/people/tests/test_person_form_identifier_crud.py b/ynr/apps/people/tests/test_person_form_identifier_crud.py index d081f0249d..bc3a5735e8 100644 --- a/ynr/apps/people/tests/test_person_form_identifier_crud.py +++ b/ynr/apps/people/tests/test_person_form_identifier_crud.py @@ -209,6 +209,27 @@ def test_mastodon_full_url(self): qs = PersonIdentifier.objects.all() self.assertEqual(qs[0].value, "joe") + def test_linkedin_urls(self): + urls_to_valid = ( + ("http://example.com/@blah", False), + ("https://www.linkedin.com/in/first-last-57338a4/", True), + ("https://uk.linkedin.com/in/first-last-57338a4/", True), + ("https://ie.linkedin.com/in/first-last-57338a4/", True), + ("https://ie.linkedin.com/in/first-last-57338a4", True), + ) + for url, valid in urls_to_valid: + with self.subTest(url=url, value=valid): + resp = self._submit_values(url, "linkedin_url") + if valid: + self.assertEqual(resp.status_code, 302) + else: + form = resp.context["identifiers_formset"] + self.assertFalse(form.is_valid()) + self.assertEqual( + form[0].non_field_errors(), + ["Please enter a valid LinkedIn URL."], + ) + def test_bad_email_address(self): resp = self._submit_values("whoops", "email") form = resp.context["identifiers_formset"] diff --git a/ynr/apps/ynr_refactoring/settings.py b/ynr/apps/ynr_refactoring/settings.py index 42f89e619d..308f01cdf4 100644 --- a/ynr/apps/ynr_refactoring/settings.py +++ b/ynr/apps/ynr_refactoring/settings.py @@ -12,7 +12,7 @@ class PersonIdentifierFields(Enum): facebook_personal_url = "Facebook Personal" homepage_url = "Homepage" blog_url = "Blog" - linkedin_url = "Linkedin" + linkedin_url = "LinkedIn URL" party_ppc_page_url = "Party Candidate Page" twitter_username = "Twitter" mastodon_username = "Mastodon" From 5c108525ecafb1c424318702353390dfe60f641b Mon Sep 17 00:00:00 2001 From: Virginia Dooley Date: Wed, 26 Jun 2024 16:04:52 +0100 Subject: [PATCH 2/5] Clean and standardise instagram entries - Audit all person identifiers and their validation - Check the domain and the username for valid entries --- ynr/apps/people/README.md | 22 +++++++++ ynr/apps/people/forms/forms.py | 12 +++++ ynr/apps/people/helpers.py | 25 ++++++++++ ynr/apps/people/models.py | 2 + .../tests/test_person_form_identifier_crud.py | 49 ++++++++++++++++++- .../people/tests/test_person_identifiers.py | 17 +++++++ ynr/apps/ynr_refactoring/settings.py | 2 +- 7 files changed, 126 insertions(+), 3 deletions(-) diff --git a/ynr/apps/people/README.md b/ynr/apps/people/README.md index 438de8daea..d830943358 100644 --- a/ynr/apps/people/README.md +++ b/ynr/apps/people/README.md @@ -10,3 +10,25 @@ standing in. Thing like their political party should be stored on the _candidacy_. This is currently represented as a `Membership` in the `popolo` app. +# Person Identifiers + +`PersonIdentifiers` are used to store contact information and online presence for a candidate. Users can add/edit these identifiers in the person update form. The following fields are available in the `PersonIdentifier` model with the corresponding form labels and accepted formats: +``` +email = "Email": Accepts a valid email address; raises an exception if the email is submitted with an invalid format. The email address must be unique. +facebook_page_url = "Facebook Page": Accepts a valid URL but does not currently validate the domain. +facebook_personal_url = "Facebook Personal": Same as above. +homepage_url = "Homepage": Accepts a valid URL +blog_url = "Blog": Accepts a valid URL +linkedin_url = "Linkedin": Accepts a valid URL; does not currently validate the domain or the format of the username. +party_ppc_page_url = "Party Candidate Page": Accepts a valid URL +twitter_username = "Twitter": Accepts a Twitter username and returns the full Twitter URL.No longer validates the username actually exists. +mastodon_username = "Mastodon": Accepts and returns a valid Mastodon URL and validates the domain and username. +wikipedia_url = "Wikipedia": Accepts a valid URL; does not currently validate the domain. +wikidata_id = "Wikidata": Accepts a valid Wikidata ID +youtube_profile = "YouTube Profile": Accepts a valid URL; does not currently validate the domain. +instagram_url = "Instagram Profile": Accepts and returns a valid Instagram URL and validates the domain and format of the username. +blue_sky_url = "Bluesky URL": Accepts and returns a valid URL +threads_url = "Threads URL": Accepts and returns a valid URL +tiktok_url = "TikTok URL": Accepts and returns a valid URL +other_url = "Other URL": Accepts and returns a valid URL +``` \ No newline at end of file diff --git a/ynr/apps/people/forms/forms.py b/ynr/apps/people/forms/forms.py index 2b156d0821..ff21a7f2e2 100644 --- a/ynr/apps/people/forms/forms.py +++ b/ynr/apps/people/forms/forms.py @@ -19,6 +19,7 @@ StrippedCharField, ) from people.helpers import ( + clean_instagram_url, clean_linkedin_url, clean_mastodon_username, clean_twitter_username, @@ -117,6 +118,17 @@ def clean(self): self.add_error(None, e) return self.cleaned_data + def clean_instagram_url(self, username): + if self.instance.value != username: + self.instance.internal_identifier = None + if self.instance.internal_identifier: + return username + try: + return clean_instagram_url(username) + except ValueError as e: + raise ValidationError(e) + return username + def clean_twitter_username(self, username): if self.instance.value != username: self.instance.internal_identifier = None diff --git a/ynr/apps/people/helpers.py b/ynr/apps/people/helpers.py index 7bfeca503f..50ed191b2d 100644 --- a/ynr/apps/people/helpers.py +++ b/ynr/apps/people/helpers.py @@ -141,6 +141,31 @@ def clean_linkedin_url(url): return urlunparse(parsed_url._replace(path=path)) +def clean_instagram_url(url): + parsed_username = urlparse(url) + if not parsed_username.scheme: + url = f"https://{url}" + parsed_username = urlparse(url) + + if parsed_username.netloc and parsed_username.netloc not in [ + "instagram.com", + "www.instagram.com", + "instagr.am", + "www.instagr.am", + ]: + raise ValueError( + "The Instagram URL must be from a valid Instagram domain." + ) + username = parsed_username.path.strip("/") + # RegEx thanks to https://blog.jstassen.com/2016/03/code-regex-for-instagram-username-and-hashtags/ + if not re.match( + r"(?:@?)([A-Za-z0-9_](?:(?:[A-Za-z0-9_]|(?:\.(?!\.))){0,28}(?:[A-Za-z0-9_]))?)$", + username, + ): + raise ValueError("This is not a valid Instagram URL. Please try again.") + return f"https://www.instagram.com/{username}/" + + def clean_wikidata_id(identifier): identifier = identifier.strip().lower() m = re.search(r"^.*wikidata.org/(wiki|entity)/(\w+)", identifier) diff --git a/ynr/apps/people/models.py b/ynr/apps/people/models.py index 8403001c9a..4bffc35a0f 100644 --- a/ynr/apps/people/models.py +++ b/ynr/apps/people/models.py @@ -169,6 +169,8 @@ def get_value_html(self): if self.value_type == "twitter_username": url = format_html("https://twitter.com/{}", self.value) + if self.value_type == "instagram_url": + url = self.value if self.value.startswith("http"): url = format_html("{}", self.value) diff --git a/ynr/apps/people/tests/test_person_form_identifier_crud.py b/ynr/apps/people/tests/test_person_form_identifier_crud.py index bc3a5735e8..e6aa43e4b0 100644 --- a/ynr/apps/people/tests/test_person_form_identifier_crud.py +++ b/ynr/apps/people/tests/test_person_form_identifier_crud.py @@ -150,7 +150,6 @@ def _submit_values(self, value, value_type="twitter_username"): form["source"] = "They changed their username" form["tmp_person_identifiers-0-value_type"] = value_type form["tmp_person_identifiers-0-value"] = value - form.submit() return form.submit() def _submit_mastodon_values(self, value, value_type="mastodon_username"): @@ -163,7 +162,18 @@ def _submit_mastodon_values(self, value, value_type="mastodon_username"): form["source"] = "They changed their username" form["tmp_person_identifiers-0-value_type"] = value_type form["tmp_person_identifiers-0-value"] = value - form.submit() + return form.submit() + + def _submit_instagram_values(self, value, value_type="instagram_url"): + resp = self.app.get( + reverse("person-update", kwargs={"person_id": self.person.pk}), + user=self.user, + ) + + form = resp.forms[1] + form["source"] = "They changed their username" + form["tmp_person_identifiers-0-value_type"] = value_type + form["tmp_person_identifiers-0-value"] = value return form.submit() def test_twitter_bad_url(self): @@ -185,6 +195,41 @@ def test_twitter_full_url(self): PersonIdentifier.objects.get().value, "madeuptwitteraccount" ) + def test_clean_instagram_url(self): + resp = self._submit_instagram_values( + "https://www.instagr.am/disco_dude" + ) + self.assertEqual(resp.status_code, 302) + instagram_url_qs = PersonIdentifier.objects.filter( + value_type="instagram_url" + ) + self.assertEqual(instagram_url_qs.count(), 1) + self.assertEqual( + instagram_url_qs[0].value, + "https://www.instagr.am/disco_dude", + ) + + def test_bad_instagram_domain(self): + resp = self._submit_instagram_values("www.instagl.am/blah") + form = resp.context["identifiers_formset"] + self.assertFalse(form.is_valid()) + self.assertEqual( + form[0].non_field_errors(), + ["The Instagram URL must be from a valid Instagram domain."], + ) + + def test_bad_instagram_username(self): + resp = self._submit_instagram_values( + "https://www.instagr.am/________blah" + ) + self.assertEqual(resp.status_code, 200) + form = resp.context["identifiers_formset"] + self.assertFalse(form.is_valid()) + self.assertEqual( + form[0].non_field_errors(), + ["This is not a valid Instagram username. Please try again."], + ) + def test_mastodon_bad_url(self): # submit a username missing the `@` symbol resp = self._submit_mastodon_values("https://mastodon.social/joe") diff --git a/ynr/apps/people/tests/test_person_identifiers.py b/ynr/apps/people/tests/test_person_identifiers.py index e6abd22959..9916bc24b1 100644 --- a/ynr/apps/people/tests/test_person_identifiers.py +++ b/ynr/apps/people/tests/test_person_identifiers.py @@ -33,6 +33,23 @@ def test_get_value_html_twitter(self): # Test the value type HTML self.assertEqual(pi.get_value_type_html, "Twitter") + def test_get_value_html_instagram(self): + pi = PersonIdentifier.objects.create( + person=self.person, + value="https://www.instagram.com/democlub", + value_type="instagram_url", + internal_identifier="2324", + ) + + # Test the value HTML + self.assertEqual( + pi.get_value_html, + """https://www.instagram.com/democlub""", + ) + + # Test the value type HTML + self.assertEqual(pi.get_value_type_html, "Instagram") + def test_get_value_html_mastodon(self): pi = PersonIdentifier.objects.create( person=self.person, diff --git a/ynr/apps/ynr_refactoring/settings.py b/ynr/apps/ynr_refactoring/settings.py index 308f01cdf4..1a51aa6d8e 100644 --- a/ynr/apps/ynr_refactoring/settings.py +++ b/ynr/apps/ynr_refactoring/settings.py @@ -19,7 +19,7 @@ class PersonIdentifierFields(Enum): wikipedia_url = "Wikipedia" wikidata_id = "Wikidata" youtube_profile = "YouTube Profile" - instagram_url = "Instagram Profile" + instagram_url = "Instagram URL" blue_sky_url = "Bluesky URL" threads_url = "Threads URL" tiktok_url = "TikTok URL" From 1468e678c923b8696d114446aa26146dd1b8a22c Mon Sep 17 00:00:00 2001 From: Virginia Dooley Date: Thu, 18 Jul 2024 17:04:05 +0100 Subject: [PATCH 3/5] Management command to check and update invalid links --- .../commands/reformat_person_identifiers.py | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 ynr/apps/people/management/commands/reformat_person_identifiers.py diff --git a/ynr/apps/people/management/commands/reformat_person_identifiers.py b/ynr/apps/people/management/commands/reformat_person_identifiers.py new file mode 100644 index 0000000000..c8fe948e65 --- /dev/null +++ b/ynr/apps/people/management/commands/reformat_person_identifiers.py @@ -0,0 +1,86 @@ +from candidatebot.helpers import CandidateBot +from django.core.exceptions import ValidationError +from django.core.management.base import BaseCommand +from django.db import transaction +from people.helpers import ( + clean_instagram_url, + clean_linkedin_url, + clean_mastodon_username, + clean_twitter_username, + clean_wikidata_id, +) +from people.models import PersonIdentifier + +# Add to this list when we have dedicated clean_* functions for an value type +supported_identifiers = { + "instagram_url": clean_instagram_url, + "linkedin_url": clean_linkedin_url, + "twitter_username": clean_twitter_username, + "mastodon_username": clean_mastodon_username, + "wikidata_id": clean_wikidata_id, +} + + +class Command(BaseCommand): + help = f"""Checks and updates links in the PersonIdentifier field. + + Currently the fields supported are {", ".join(supported_identifiers)}. + """ + + def add_arguments(self, parser): + parser.add_argument( + "--update", + help="Write changes to IDs that have been cleaned", + action="store_true", + ) + parser.add_argument( + "--remove", + help="Remove invalid IDs that can't be cleaned", + action="store_true", + ) + + @transaction.atomic() + def handle(self, *args, **options): + """ + Iterate over all PersonIdentifier objects and reformat urls as needed. + """ + + if not options["update"] or not options["remove"]: + self.stdout.write("Not modifying data, just reporting on changes.") + + person_identifiers = PersonIdentifier.objects.filter( + value_type__in=supported_identifiers.keys() + ) + for identifier in person_identifiers: + initial_value = identifier.value + try: + cleaned_value = supported_identifiers[identifier.value_type]( + initial_value + ) + if initial_value != cleaned_value: + self.change_identifier( + identifier, cleaned_value, save=options["update"] + ) + + except (ValueError, ValidationError) as exception: + self.unclean_identifier( + identifier, exception, remove=options["remove"] + ) + + def change_identifier(self, identifier, cleaned_value, save=False): + self.stdout.write( + f"Changing person {identifier.person_id}: {identifier.value} to {cleaned_value}" + ) + if save: + bot = CandidateBot(identifier.person_id, update=True) + bot.edit_field(identifier.value_type, cleaned_value) + bot.save(source="Auto-cleaning URLs") + + def unclean_identifier(self, identifier, exception, remove=False): + self.stdout.write( + f"ERROR: {identifier.person_id}: {identifier.value}: {exception}" + ) + if remove: + bot = CandidateBot(identifier.person_id, update=True) + bot.delete_field(identifier.value_type) + bot.save(source="Auto removing invalid IDs") From 6f6d62102765ef962a12ecda7f2e2fded40509c1 Mon Sep 17 00:00:00 2001 From: Sym Roe Date: Wed, 7 Aug 2024 12:55:34 +0100 Subject: [PATCH 4/5] Teach CandidateBot to remove fields --- ynr/apps/candidatebot/helpers.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ynr/apps/candidatebot/helpers.py b/ynr/apps/candidatebot/helpers.py index dcc31e81a5..543cfd8ba8 100644 --- a/ynr/apps/candidatebot/helpers.py +++ b/ynr/apps/candidatebot/helpers.py @@ -191,3 +191,13 @@ def add_theyworkforyou(self, twfy_id): value = f"https://www.theyworkforyou.com/mp/{twfy_id}/" internal_id = f"uk.org.publicwhip/person/{twfy_id}" self.edit_field("theyworkforyou", value, internal_id=internal_id) + + def delete_field(self, value_type): + """ + Remove a PersonIdentifier from a person + """ + deleted, _ = self.person.tmp_person_identifiers.filter( + value_type=value_type + ).delete() + if deleted: + self.edits_made = True From 6327f12b378b3ce0d671b4373a29fd691cf71652 Mon Sep 17 00:00:00 2001 From: Sym Roe Date: Wed, 7 Aug 2024 15:22:29 +0100 Subject: [PATCH 5/5] DRY up submitting values logic --- .../tests/test_person_form_identifier_crud.py | 44 +++++-------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/ynr/apps/people/tests/test_person_form_identifier_crud.py b/ynr/apps/people/tests/test_person_form_identifier_crud.py index e6aa43e4b0..d2de912d05 100644 --- a/ynr/apps/people/tests/test_person_form_identifier_crud.py +++ b/ynr/apps/people/tests/test_person_form_identifier_crud.py @@ -152,30 +152,6 @@ def _submit_values(self, value, value_type="twitter_username"): form["tmp_person_identifiers-0-value"] = value return form.submit() - def _submit_mastodon_values(self, value, value_type="mastodon_username"): - resp = self.app.get( - reverse("person-update", kwargs={"person_id": self.person.pk}), - user=self.user, - ) - - form = resp.forms[1] - form["source"] = "They changed their username" - form["tmp_person_identifiers-0-value_type"] = value_type - form["tmp_person_identifiers-0-value"] = value - return form.submit() - - def _submit_instagram_values(self, value, value_type="instagram_url"): - resp = self.app.get( - reverse("person-update", kwargs={"person_id": self.person.pk}), - user=self.user, - ) - - form = resp.forms[1] - form["source"] = "They changed their username" - form["tmp_person_identifiers-0-value_type"] = value_type - form["tmp_person_identifiers-0-value"] = value - return form.submit() - def test_twitter_bad_url(self): resp = self._submit_values("http://example.com/blah") form = resp.context["identifiers_formset"] @@ -196,8 +172,8 @@ def test_twitter_full_url(self): ) def test_clean_instagram_url(self): - resp = self._submit_instagram_values( - "https://www.instagr.am/disco_dude" + resp = self._submit_values( + "https://www.instagr.am/disco_dude", value_type="instagram_url" ) self.assertEqual(resp.status_code, 302) instagram_url_qs = PersonIdentifier.objects.filter( @@ -206,11 +182,13 @@ def test_clean_instagram_url(self): self.assertEqual(instagram_url_qs.count(), 1) self.assertEqual( instagram_url_qs[0].value, - "https://www.instagr.am/disco_dude", + "https://www.instagram.com/disco_dude/", ) def test_bad_instagram_domain(self): - resp = self._submit_instagram_values("www.instagl.am/blah") + resp = self._submit_values( + "www.instbad.am/blah", value_type="instagram_url" + ) form = resp.context["identifiers_formset"] self.assertFalse(form.is_valid()) self.assertEqual( @@ -219,20 +197,22 @@ def test_bad_instagram_domain(self): ) def test_bad_instagram_username(self): - resp = self._submit_instagram_values( - "https://www.instagr.am/________blah" + resp = self._submit_values( + "https://www.instagr.am/___@_____blah", value_type="instagram_url" ) self.assertEqual(resp.status_code, 200) form = resp.context["identifiers_formset"] self.assertFalse(form.is_valid()) self.assertEqual( form[0].non_field_errors(), - ["This is not a valid Instagram username. Please try again."], + ["This is not a valid Instagram URL. Please try again."], ) def test_mastodon_bad_url(self): # submit a username missing the `@` symbol - resp = self._submit_mastodon_values("https://mastodon.social/joe") + resp = self._submit_values( + "https://mastodon.social/joe", value_type="mastodon_username" + ) form = resp.context["identifiers_formset"] self.assertFalse(form.is_valid()) self.assertEqual(