From c6ff12d0850b1ab55fe3adc139dbfc1fe38e7933 Mon Sep 17 00:00:00 2001 From: Nick Youngblut Date: Tue, 27 May 2025 10:51:19 -0700 Subject: [PATCH 1/4] Add pyproject and update for NumPy 2 --- pyensembl/__init__.py | 10 ++++++ pyensembl/shell.py | 9 ++++-- pyproject.toml | 39 +++++++++++++++++++++++ requirements.txt | 7 ---- setup.py | 74 ------------------------------------------- 5 files changed, 56 insertions(+), 83 deletions(-) create mode 100644 pyproject.toml delete mode 100644 requirements.txt delete mode 100644 setup.py diff --git a/pyensembl/__init__.py b/pyensembl/__init__.py index e44e8f2..d1a5673 100644 --- a/pyensembl/__init__.py +++ b/pyensembl/__init__.py @@ -10,6 +10,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""pyensembl public API and compatibility helpers.""" + +import numpy as np + +# ``numpy.typeDict`` was removed in NumPy 2.0. Some of ``pyensembl``'s +# dependencies still rely on this old attribute, so provide it when +# running under newer versions of NumPy. +if not hasattr(np, "typeDict"): + np.typeDict = np.sctypeDict + from .database import Database from .download_cache import DownloadCache from .ensembl_release import EnsemblRelease, cached_release diff --git a/pyensembl/shell.py b/pyensembl/shell.py index 4c878e5..2e195cc 100755 --- a/pyensembl/shell.py +++ b/pyensembl/shell.py @@ -40,7 +40,10 @@ import argparse import logging.config -import pkg_resources +try: + from importlib import resources as importlib_resources +except ImportError: # pragma: no cover - Python <3.9 fallback + import importlib_resources # type: ignore import os from .ensembl_release import EnsemblRelease @@ -49,7 +52,9 @@ from .species import Species from .version import __version__ -logging.config.fileConfig(pkg_resources.resource_filename(__name__, "logging.conf")) +logging.config.fileConfig( + importlib_resources.files(__package__).joinpath("logging.conf") +) logger = logging.getLogger(__name__) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..deb5c09 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,39 @@ +[build-system] +requires = ["setuptools>=64", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "pyensembl" +dynamic = ["version"] +description = "Python interface to Ensembl reference genome metadata" +readme = "README.md" +requires-python = ">=3.8" +license = {file = "LICENSE"} +authors = [{name = "Alex Rubinsteyn", email = "alex.rubinsteyn@unc.edu"}] +keywords = ["ensembl", "genomics", "bioinformatics"] + +dependencies = [ + "typechecks>=0.0.2,<1.0.0", + "datacache>=1.4.0,<2.0.0", + "memoized-property>=1.0.2", + "tinytimer>=0.0.0,<1.0.0", + "gtfparse>=2.5.0,<3.0.0", + "serializable>=0.2.1,<1.0.0", +] + +[project.urls] +Homepage = "https://github.com/openvax/pyensembl" +Source = "https://github.com/openvax/pyensembl" + +[project.scripts] +pyensembl = "pyensembl.shell:run" + +[tool.setuptools.packages.find] +where = ["."] +include = ["pyensembl*"] + +[tool.setuptools.package-data] +pyensembl = ["logging.conf"] + +[tool.setuptools.dynamic] +version = {attr = "pyensembl.version.__version__"} diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index d177827..0000000 --- a/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -typechecks>=0.0.2,<1.0.0 -datacache>=1.4.0,<2.0.0 -memoized-property>=1.0.2 -tinytimer>=0.0.0,<1.0.0 -gtfparse>=2.5.0,<3.0.0 -serializable>=0.2.1,<1.0.0 -pylint>=2.17.2,<3.0.0 diff --git a/setup.py b/setup.py deleted file mode 100644 index 634e59d..0000000 --- a/setup.py +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import os -import re - -# TODO: replace setup.py with pyproject.toml -from setuptools import setup - -package_name = "pyensembl" -current_directory = os.path.dirname(__file__) -readme_filename = "README.md" -readme_path = os.path.join(current_directory, readme_filename) -github_url = "https://github.com/openvax/%s" % package_name - -try: - with open(readme_path, "r") as f: - readme_markdown = f.read() -except IOError as e: - print(e) - print("Failed to open %s" % readme_path) - readme_markdown = "" - - -with open("%s/version.py" % package_name, "r") as f: - version = re.search( - r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', f.read(), re.MULTILINE - ).group(1) - -if not version: - raise RuntimeError("Cannot find version information") - -if __name__ == "__main__": - with open("requirements.txt") as f: - requirements = [l.strip() for l in f] - - setup( - name=package_name, - version=version, - description="Python interface to Ensembl reference genome metadata", - author="Alex Rubinsteyn", - author_email="alex.rubinsteyn@unc.edu", - url=github_url, - license="http://www.apache.org/licenses/LICENSE-2.0.html", - entry_points={ - "console_scripts": ["pyensembl = %s.shell:run" % package_name], - }, - classifiers=[ - "Development Status :: 4 - Beta", - "Environment :: Console", - "Operating System :: OS Independent", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python", - "Topic :: Scientific/Engineering :: Bio-Informatics", - ], - install_requires=requirements, - long_description=readme_markdown, - long_description_content_type="text/markdown", - packages=[package_name], - package_data={ - package_name: ["logging.conf", "../requirements.txt"], - }, - ) From ec1699163ba99f4458712913578cafb2df7767b7 Mon Sep 17 00:00:00 2001 From: Nick Youngblut Date: Tue, 27 May 2025 11:31:41 -0700 Subject: [PATCH 2/4] Add development dependencies and update README for testing instructions - Added optional development dependencies in `pyproject.toml`: `pytest`, `pytest-cov`, `flake8`, and `coveralls`. - Updated `README.md` to include installation instructions for development and detailed testing commands. - Removed `test.sh` script in favor of direct `pytest` commands in the workflow. - Updated GitHub Actions workflow to install dependencies in editable mode and run tests directly. --- .github/workflows/tests.yml | 6 ++-- .gitignore | 3 ++ README.md | 56 ++++++++++++++++++++++++++++++++++++- pyproject.toml | 8 ++++++ test.sh | 1 - 5 files changed, 68 insertions(+), 6 deletions(-) delete mode 100755 test.sh diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8596cd1..4b1e1d4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -28,9 +28,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8 pytest pytest-cov coveralls - pip install -r requirements.txt - pip install . + pip install -e ".[dev]" - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names @@ -49,6 +47,6 @@ jobs: pyensembl install --release 93 --species mouse --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCm38.93/ - name: Run unit tests run: | - ./test.sh + pytest --cov=pyensembl/ --cov-report=term-missing tests/ - name: Publish coverage to Coveralls uses: coverallsapp/github-action@v2.2.3 diff --git a/.gitignore b/.gitignore index 51cbe85..ee11c92 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# cursor +.cursor/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index 8d5f23c..ced739e 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ PyPI -# PyEnsembl +PyEnsembl +========= PyEnsembl is a Python interface to [Ensembl](http://www.ensembl.org) reference genome metadata such as exons and transcripts. PyEnsembl downloads [GTF](https://en.wikipedia.org/wiki/Gene_transfer_format) and [FASTA](https://en.wikipedia.org/wiki/FASTA_format) files from the [Ensembl FTP server](ftp://ftp.ensembl.org) and loads them into a local database. PyEnsembl can also work with custom reference data specified using user-supplied GTF and FASTA files. @@ -138,6 +139,59 @@ data.index() gene_names = data.gene_names_at_locus(contig=6, position=29945884) ``` +# Development and Testing + +## Installation for Development + +To install PyEnsembl for development with testing dependencies: + +```sh +pip install -e ".[dev]" +``` + +This installs PyEnsembl in editable mode along with development dependencies including `pytest`, `pytest-cov`, `flake8`, and `coveralls`. + +## Running Tests + +PyEnsembl uses pytest for testing. Before running tests, you'll need to install some Ensembl data: + +```sh +# Install required Ensembl releases for testing +pyensembl install --release 75 --species human +pyensembl install --release 77 --species human +pyensembl install --release 93 --species human +``` + +### Run all tests: + +```sh +pytest tests/ +``` + +### Run tests with coverage: + +```sh +pytest --cov=pyensembl/ --cov-report=term-missing tests/ +``` + +### Run a specific test file: + +```sh +pytest tests/test_gene_names.py +``` + +### Run a specific test function: + +```sh +pytest tests/test_gene_names.py::test_all_gene_names +``` + +### Run tests with verbose output: + +```sh +pytest -v tests/ +``` + # API The `EnsemblRelease` object has methods to let you access all possible diff --git a/pyproject.toml b/pyproject.toml index deb5c09..891ebc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,14 @@ dependencies = [ "serializable>=0.2.1,<1.0.0", ] +[project.optional-dependencies] +dev = [ + "pytest>=6.0", + "pytest-cov>=2.0", + "flake8>=3.8", + "coveralls>=3.0", +] + [project.urls] Homepage = "https://github.com/openvax/pyensembl" Source = "https://github.com/openvax/pyensembl" diff --git a/test.sh b/test.sh deleted file mode 100755 index 38164a0..0000000 --- a/test.sh +++ /dev/null @@ -1 +0,0 @@ -pytest --cov=pyensembl/ --cov-report=term-missing tests From 051272cb5c082099f30002fb6b7305ae242c5662 Mon Sep 17 00:00:00 2001 From: Nick Youngblut Date: Tue, 27 May 2025 11:40:19 -0700 Subject: [PATCH 3/4] Update dependencies in pyproject.toml - Removed upper version constraint for `gtfparse`. - Added `pyarrow` with a minimum version of 16.0.0. - Added `pandas` with a minimum version of 2.0.0. --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 891ebc4..4e3c8e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,8 +17,10 @@ dependencies = [ "datacache>=1.4.0,<2.0.0", "memoized-property>=1.0.2", "tinytimer>=0.0.0,<1.0.0", - "gtfparse>=2.5.0,<3.0.0", + "gtfparse>=2.5.0", "serializable>=0.2.1,<1.0.0", + "pyarrow>=16.0.0", + "pandas>=2.0.0", ] [project.optional-dependencies] From 22a87604c8122d943bcdaca41a5b6b5c13ae682e Mon Sep 17 00:00:00 2001 From: nick-youngblut Date: Tue, 27 May 2025 16:02:23 -0700 Subject: [PATCH 4/4] switched to updated gtfparse --- README.md | 7 ++++--- pyproject.toml | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ced739e..9f2fee2 100644 --- a/README.md +++ b/README.md @@ -157,9 +157,10 @@ PyEnsembl uses pytest for testing. Before running tests, you'll need to install ```sh # Install required Ensembl releases for testing -pyensembl install --release 75 --species human -pyensembl install --release 77 --species human -pyensembl install --release 93 --species human +pyensembl install --release 75 --species human \ + && pyensembl install --release 77 --species human \ + && pyensembl install --release 93 --species human \ + && pyensembl install --release 111 --species homo_sapiens ``` ### Run all tests: diff --git a/pyproject.toml b/pyproject.toml index 4e3c8e7..f71697a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,8 @@ dependencies = [ "datacache>=1.4.0,<2.0.0", "memoized-property>=1.0.2", "tinytimer>=0.0.0,<1.0.0", - "gtfparse>=2.5.0", + #"gtfparse>=2.5.0", + "gtfparse @ git+https://github.com/nick-youngblut/gtfparse.git@pyarrow_update", "serializable>=0.2.1,<1.0.0", "pyarrow>=16.0.0", "pandas>=2.0.0",