Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ classifiers = [
requires-python = ">=3.10"

dependencies = [
"hdx-python-api>= 6.5.2",
"hdx-python-api>= 6.5.3",
"hdx-python-country>= 3.9.8",
"hdx-python-utilities>= 3.9.5",
"mixpanel-utils",
Expand Down
22 changes: 11 additions & 11 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ attrs==25.4.0
# jsonlines
# jsonschema
# referencing
certifi==2025.10.5
certifi==2025.11.12
# via
# -c requirements.txt
# requests
Expand All @@ -23,15 +23,15 @@ charset-normalizer==3.4.4
# via
# -c requirements.txt
# requests
ckanapi==4.8
ckanapi==4.9
# via
# -c requirements.txt
# hdx-python-api
click==8.3.0
click==8.3.1
# via
# -c requirements.txt
# typer
coverage==7.11.0
coverage==7.11.3
# via pytest-cov
cydifflib==1.2.0
# via hdx-analysis-scripts (pyproject.toml)
Expand All @@ -48,7 +48,7 @@ docopt==0.6.2
# -c requirements.txt
# ckanapi
# num2words
docutils==0.22.2
docutils==0.22.3
# via
# -c requirements.txt
# defopt
Expand All @@ -64,7 +64,7 @@ frictionless==5.18.1
# via
# -c requirements.txt
# hdx-python-utilities
hdx-python-api==6.5.2
hdx-python-api==6.5.3
# via
# -c requirements.txt
# hdx-analysis-scripts (pyproject.toml)
Expand Down Expand Up @@ -187,11 +187,11 @@ pockets==0.9.1
# via
# -c requirements.txt
# sphinxcontrib-napoleon
pydantic==2.12.3
pydantic==2.12.4
# via
# -c requirements.txt
# frictionless
pydantic-core==2.41.4
pydantic-core==2.41.5
# via
# -c requirements.txt
# pydantic
Expand All @@ -204,7 +204,7 @@ pyphonetics==0.5.3
# via
# -c requirements.txt
# hdx-python-utilities
pytest==8.4.2
pytest==9.0.1
# via
# hdx-analysis-scripts (pyproject.toml)
# pytest-check
Expand Down Expand Up @@ -266,7 +266,7 @@ rich==14.2.0
# via
# -c requirements.txt
# typer
rpds-py==0.28.0
rpds-py==0.29.0
# via
# -c requirements.txt
# jsonschema
Expand All @@ -275,7 +275,7 @@ ruamel-yaml==0.18.16
# via
# -c requirements.txt
# hdx-python-utilities
ruamel-yaml-clib==0.2.14
ruamel-yaml-clib==0.2.15
# via
# -c requirements.txt
# ruamel-yaml
Expand Down
18 changes: 9 additions & 9 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ attrs==25.4.0
# jsonlines
# jsonschema
# referencing
certifi==2025.10.5
certifi==2025.11.12
# via requests
chardet==5.2.0
# via frictionless
charset-normalizer==3.4.4
# via requests
ckanapi==4.8
ckanapi==4.9
# via hdx-python-api
click==8.3.0
click==8.3.1
# via typer
defopt==7.0.0
# via hdx-python-api
Expand All @@ -26,15 +26,15 @@ docopt==0.6.2
# via
# ckanapi
# num2words
docutils==0.22.2
docutils==0.22.3
# via defopt
email-validator==2.3.0
# via hdx-python-api
et-xmlfile==2.0.0
# via openpyxl
frictionless==5.18.1
# via hdx-python-utilities
hdx-python-api==6.5.2
hdx-python-api==6.5.3
# via hdx-analysis-scripts (pyproject.toml)
hdx-python-country==3.9.8
# via
Expand Down Expand Up @@ -101,9 +101,9 @@ ply==3.11
# libhxl
pockets==0.9.1
# via sphinxcontrib-napoleon
pydantic==2.12.3
pydantic==2.12.4
# via frictionless
pydantic-core==2.41.4
pydantic-core==2.41.5
# via pydantic
pygments==2.19.2
# via rich
Expand Down Expand Up @@ -145,13 +145,13 @@ rfc3986==2.0.0
# via frictionless
rich==14.2.0
# via typer
rpds-py==0.28.0
rpds-py==0.29.0
# via
# jsonschema
# referencing
ruamel-yaml==0.18.16
# via hdx-python-utilities
ruamel-yaml-clib==0.2.14
ruamel-yaml-clib==0.2.15
# via ruamel-yaml
setuptools==80.9.0
# via ckanapi
Expand Down
22 changes: 22 additions & 0 deletions src/hdx/analysis_scripts/common/dataset_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ class DatasetStatistics(UserDict):

def __init__(
self,
organisations,
users,
today,
dataset_name_to_explorers,
dataset_id_to_requests,
Expand All @@ -26,6 +28,8 @@ def __init__(
dataset,
):
super().__init__(dataset.data)
self.organisations = organisations
self.users = users
self.today = today
self.last_3_months = today - relativedelta(months=3)
self.previous_quarter = get_previous_quarter(today)
Expand All @@ -48,6 +52,7 @@ def __init__(
self.get_last_modified_freshness()
self.get_end_date_freshness()
self.get_quickcharts()
self.get_maintainer()

def get_status(self):
self.public = "N" if self["private"] else "Y"
Expand Down Expand Up @@ -320,3 +325,20 @@ def get_quickcharts(self):
self.has_quickcharts = "Y"
else:
self.has_quickcharts = "N"

def get_maintainer(self):
self.valid_maintainer = "N"
maintainer_id = self["maintainer"]
maintainer = self.users.get(maintainer_id)
if not maintainer:
return
if maintainer["sysadmin"]:
self.valid_maintainer = "Y"
return
organisation_id = self["organization"]["id"]
organisation = self.organisations[organisation_id]
for user in organisation.get("users", []):
if user["id"] == maintainer_id:
if user["capacity"] in ("admin", "editor"):
self.valid_maintainer = "Y"
return
12 changes: 12 additions & 0 deletions src/hdx/analysis_scripts/common/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from hdx.data.dataset import Dataset
from hdx.data.organization import Organization
from hdx.data.user import User
from hdx.utilities.downloader import Download
from hdx.utilities.loader import load_yaml
from hdx.utilities.saver import save_json
Expand Down Expand Up @@ -60,6 +61,7 @@ class Downloads:
packagelinks_file = "package_links.json"
hdxconnect_file = "hdxconnect.json"
organisations_file = "organisations.json"
users_file = "users.json"
aging_file = "aging.yaml"

def __init__(self, today, mixpanel_config_yaml, saved_dir=None):
Expand Down Expand Up @@ -169,3 +171,13 @@ def get_all_organisations(self):
if self.saved_dir:
save_json(organisations, join(self.saved_dir, self.organisations_file))
return organisations

def get_all_users(self):
logger.info("Obtaining user data")
user_list = User.get_all_users()
users = {}
for user in user_list:
users[user["id"]] = user.data
if self.saved_dir:
save_json(users, join(self.saved_dir, self.users_file))
return users
6 changes: 6 additions & 0 deletions src/hdx/analysis_scripts/datasets/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def main(downloads, output_dir, **ignore):
last_modified_aging = get_aging(configuration["last_modified_aging"])
end_date_aging = get_aging(configuration["end_date_aging"])
dataset_downloads = downloads.get_mixpanel_downloads(60)
organisations = downloads.get_all_organisations()
users = downloads.get_all_users()
created_per_month = {}
metadata_updated_per_month = {}
data_updated_per_month = {}
Expand Down Expand Up @@ -70,10 +72,13 @@ def main(downloads, output_dir, **ignore):
"updated_by_script<<last_modified",
"last_modified<<updated_by_script",
"has quickcharts",
"valid maintainer",
)
]
for dataset in downloads.get_all_datasets():
datasetstats = DatasetStatistics(
organisations,
users,
downloads.today,
dataset_name_to_explorers,
dataset_id_to_requests,
Expand Down Expand Up @@ -138,6 +143,7 @@ def main(downloads, output_dir, **ignore):
datasetstats.old_updated_by_noncod_script,
datasetstats.outdated_lastmodified,
datasetstats.has_quickcharts,
datasetstats.valid_maintainer,
)
rows.append(row)
if rows:
Expand Down
9 changes: 8 additions & 1 deletion src/hdx/analysis_scripts/orgs/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ def main(downloads, output_dir, **ignore):
end_date_aging = get_aging(configuration["end_date_aging"])
dataset_3m_downloads = downloads.get_mixpanel_downloads(3)
dataset_1y_downloads = downloads.get_mixpanel_downloads(12)
logger.info("Obtaining organisations data")
organisations = downloads.get_all_organisations()
users = downloads.get_all_users()
total_public = 0
total_public_internal = 0
total_public_external = 0
Expand Down Expand Up @@ -144,9 +144,12 @@ def main(downloads, output_dir, **ignore):
organisation["tags"] = set()
organisation["has crisis"] = "N"
organisation["has quickcharts"] = "N"
organisation["valid maintainers"] = "Y"
outdated_lastmodifieds = {}
for dataset in downloads.get_all_datasets():
datasetstats = DatasetStatistics(
organisations,
users,
downloads.today,
dataset_name_to_explorers,
dataset_id_to_requests,
Expand Down Expand Up @@ -246,6 +249,8 @@ def main(downloads, output_dir, **ignore):
organisation["has crisis"] = "Y"
if datasetstats.has_quickcharts == "Y":
organisation["has quickcharts"] = "Y"
if datasetstats.valid_maintainer == "N":
organisation["valid maintainers"] = "N"

headers = [
"Organisation name",
Expand Down Expand Up @@ -303,6 +308,7 @@ def main(downloads, output_dir, **ignore):
"Tags",
"Has crisis",
"Has quickcharts",
"Maintainers valid",
]

def get_number_percentage(organisation, key):
Expand Down Expand Up @@ -406,6 +412,7 @@ def get_number_percentage(organisation, key):
",".join(sorted(organisation["tags"])),
organisation["has crisis"],
organisation["has quickcharts"],
organisation["valid maintainers"],
]
rows.append(row)
if rows:
Expand Down
6 changes: 5 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def input_folder(fixtures):
@pytest.fixture(scope="session")
def mock_downloads(input_folder):
class MockDownloads:
today = parse_date("2025-07-20 22:50:00")
today = parse_date("2025-11-16 22:50:00")

@classmethod
def set_api_key(cls, api_key):
Expand Down Expand Up @@ -97,6 +97,10 @@ def get_requests():
def get_all_organisations():
return load_json(join(input_folder, Downloads.organisations_file))

@staticmethod
def get_all_users():
return load_json(join(input_folder, Downloads.users_file))

@staticmethod
def get_aging(url):
return load_yaml(join(input_folder, Downloads.aging_file))
Expand Down
Loading
Loading