From 4ff752342e4a96f53239c13a6e20111f0eb987a4 Mon Sep 17 00:00:00 2001 From: Ryan Dale <115406+daler@users.noreply.github.com> Date: Sat, 17 Jan 2026 18:55:23 -0500 Subject: [PATCH 1/9] update cli to address #224 --- gffutils/scripts/gffutils-cli | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gffutils/scripts/gffutils-cli b/gffutils/scripts/gffutils-cli index 051b76d5..70a882b5 100755 --- a/gffutils/scripts/gffutils-cli +++ b/gffutils/scripts/gffutils-cli @@ -76,7 +76,7 @@ def fetch(db, ids): (like grep -v)''') @arg('--exclude-self', help='''Use this to suppress reporting the IDs you've provided.''') -def children(db, ids, limit=None, exclude=None, exclude_self=False): +def children(db, ids, *, limit=None, exclude=None, exclude_self=False): """ Fetch children from the database according to ID. """ @@ -110,7 +110,7 @@ def children(db, ids, limit=None, exclude=None, exclude_self=False): (like grep -v)''') @arg('--exclude-self', help='''Use this to suppress reporting the IDs you've provided.''') -def parents(db, ids, limit=None, exclude=None, exclude_self=False): +def parents(db, ids, *, limit=None, exclude=None, exclude_self=False): """ Fetch parents from the database according to ID. """ @@ -167,7 +167,7 @@ def common(db): @arg('--disable-infer-transcripts', help='''Disable inferring of transcript extents for GTF files. Use this if your GTF file already has "transcript" featuretypes''') -def create(filename, output=None, force=False, quiet=False, merge="merge", +def create(filename, *, output=None, force=False, quiet=False, merge="merge", disable_infer_genes=False, disable_infer_transcripts=False): """ Create a database. @@ -198,7 +198,7 @@ def clean(filename): @arg('--in-place', help='''Sanitize file in-place: overwrites current file with sanitized version.''') -def sanitize(filename, +def sanitize(filename, *, in_memory=True, in_place=False): """ @@ -225,7 +225,7 @@ def sanitize(filename, @arg('filename', help='''GFF or GTF file to use.''') @arg('--in-place', help='''Remove duplicates in place (overwrite current file.)''') -def rmdups(filename, in_place=False): +def rmdups(filename, *, in_place=False): """ Remove duplicates from a GFF file. """ @@ -278,7 +278,7 @@ def convert(filename): @arg('--featuretype', help='''Restrict to a particular featuretype. This can be faster than doing a grep on the output, since it restricts the search space in the database''') -def search(db, text, featuretype=None): +def search(db, text, *, featuretype=None): """ Search the attributes. """ From aa1d2680b3c631f8c3a3a44b4918a0baa7fbce04 Mon Sep 17 00:00:00 2001 From: Ryan Dale <115406+daler@users.noreply.github.com> Date: Sat, 17 Jan 2026 16:46:20 -0500 Subject: [PATCH 2/9] skip biopython integration test if it's not installed fixes #233 --- gffutils/test/test_biopython_integration.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gffutils/test/test_biopython_integration.py b/gffutils/test/test_biopython_integration.py index 58c5866a..e9f8e81d 100644 --- a/gffutils/test/test_biopython_integration.py +++ b/gffutils/test/test_biopython_integration.py @@ -1,6 +1,10 @@ from gffutils import example_filename import gffutils import gffutils.biopython_integration as bp +import pytest + +# Skip tests entirely if BioPython not available +pytest.importorskip('Bio') def test_roundtrip(): From 5fffdba4a5203372e6fc9a435842db9c4601f8b0 Mon Sep 17 00:00:00 2001 From: Ryan Dale <115406+daler@users.noreply.github.com> Date: Sat, 17 Jan 2026 16:48:29 -0500 Subject: [PATCH 3/9] bump version of artifact action --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d3716aa6..87c6e9f2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -102,7 +102,7 @@ jobs: - name: push artifact if: ${{ (matrix.python-version == 3.9) }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: doc path: /tmp/docs From eb9e2cd48b847537fa623ad5942a63f89c0c4cff Mon Sep 17 00:00:00 2001 From: Ryan Dale <115406+daler@users.noreply.github.com> Date: Sat, 17 Jan 2026 16:50:15 -0500 Subject: [PATCH 4/9] mambaforge -> miniforge for CI --- .github/workflows/main.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 87c6e9f2..d1ca11f8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,9 +18,8 @@ jobs: - name: conda env run: | - wget -O Mambaforge.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" - curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" - bash Mambaforge.sh -b -p "${HOME}/conda" + wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" + bash Miniforge3.sh -b -p "${HOME}/conda" source "${HOME}/conda/etc/profile.d/conda.sh" source "${HOME}/conda/etc/profile.d/mamba.sh" which conda From 8daeffb82016250504bb34ebaaeca3c340ef2280 Mon Sep 17 00:00:00 2001 From: Ryan Dale <115406+daler@users.noreply.github.com> Date: Sat, 17 Jan 2026 16:55:54 -0500 Subject: [PATCH 5/9] parametrize test (rather than yield) --- gffutils/test/test_1.py | 91 +++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 49 deletions(-) diff --git a/gffutils/test/test_1.py b/gffutils/test/test_1.py index 2b88cc04..d793aa49 100644 --- a/gffutils/test/test_1.py +++ b/gffutils/test/test_1.py @@ -482,58 +482,51 @@ def test_sanitize_gff(): print("Sanitized GFF successfully.") -def test_region(): - +@pytest.mark.parametrize("kwargs,expected", [ + # previously failed, see issue #45 + (dict(seqid="chr2L", start=1, end=2e9, completely_within=True), 27), + (dict(region="chr2L", start=0), ValueError), + (dict(region="chr2L", end=0), ValueError), + (dict(region="chr2L", seqid=0), ValueError), + # these coords should catch everything + (dict(region="chr2L:7529-12500"), 27), + # stranded versions: + (dict(region="chr2L:7529-12500", strand="."), 0), + (dict(region="chr2L:7529-12500", strand="+"), 21), + (dict(region="chr2L:7529-12500", strand="-"), 6), + # different ways of selecting only that last exon in the last gene: + (dict(seqid="chr2L", start=11500, featuretype="exon"), 1), + (dict(seqid="chr2L", start=9500, featuretype="exon", strand="+"), 1), + # alternative method + (dict(seqid="chr2L", start=7529, end=12500), 27), + # since default completely_within=False, this catches anything that + # falls after 7680. So it only excludes the 5'UTR, which ends at 7679. + (dict(seqid="chr2L", start=7680), 26), + # but completely_within=True will exclude the gene and mRNAs, first + # exon and the 5'UTR + (dict(seqid="chr2L", start=7680, completely_within=True), 22), + # similarly, this will *exclude* anything before 7680 + (dict(seqid="chr2L", end=7680), 5), + # and also similarly, this will only get us the 5'UTR which is the only + # feature falling completely before 7680 + (dict(seqid="chr2L", end=7680, completely_within=True), 1), + # and there's only features from chr2L in this file, so this catches + # everything too + (dict(region="chr2L"), 27), + # using seqid should work similarly to `region` with only chromosome + (dict(seqid="chr2L"), 27), + # nonexistent + (dict(region="nowhere"), 0), +]) +def test_region(kwargs, expected): db_fname = gffutils.example_filename("FBgn0031208.gff") db = gffutils.create_db(db_fname, ":memory:", keep_order=True) - def _check(item): - kwargs, expected = item - try: - obs = list(db.region(**kwargs)) - assert len(obs) == expected, "expected %s got %s" % (expected, len(obs)) - except expected: - pass - - regions = [ - # previously failed, see issue #45 - (dict(seqid="chr2L", start=1, end=2e9, completely_within=True), 27), - (dict(region="chr2L", start=0), ValueError), - (dict(region="chr2L", end=0), ValueError), - (dict(region="chr2L", seqid=0), ValueError), - # these coords should catch everything - (dict(region="chr2L:7529-12500"), 27), - # stranded versions: - (dict(region="chr2L:7529-12500", strand="."), 0), - (dict(region="chr2L:7529-12500", strand="+"), 21), - (dict(region="chr2L:7529-12500", strand="-"), 6), - # different ways of selecting only that last exon in the last gene: - (dict(seqid="chr2L", start=11500, featuretype="exon"), 1), - (dict(seqid="chr2L", start=9500, featuretype="exon", strand="+"), 1), - # alternative method - (dict(seqid="chr2L", start=7529, end=12500), 27), - # since default completely_within=False, this catches anything that - # falls after 7680. So it only excludes the 5'UTR, which ends at 7679. - (dict(seqid="chr2L", start=7680), 26), - # but completely_within=True will exclude the gene and mRNAs, first - # exon and the 5'UTR - (dict(seqid="chr2L", start=7680, completely_within=True), 22), - # similarly, this will *exclude* anything before 7680 - (dict(seqid="chr2L", end=7680), 5), - # and also similarly, this will only get us the 5'UTR which is the only - # feature falling completely before 7680 - (dict(seqid="chr2L", end=7680, completely_within=True), 1), - # and there's only features from chr2L in this file, so this catches - # everything too - (dict(region="chr2L"), 27), - # using seqid should work similarly to `region` with only chromosome - (dict(seqid="chr2L"), 27), - # nonexistent - (dict(region="nowhere"), 0), - ] - - for item in regions: - yield _check, item + try: + obs = list(db.region(**kwargs)) + assert len(obs) == expected, "expected %s got %s" % (expected, len(obs)) + except expected: + pass def test_nonascii(): From b1e6d4a9a18c96a2bb25d003b35a961e7de70a7a Mon Sep 17 00:00:00 2001 From: Ryan Dale <115406+daler@users.noreply.github.com> Date: Sat, 17 Jan 2026 17:03:07 -0500 Subject: [PATCH 6/9] fix pybedtools_integration doctests tsses() -> gffutils.pybedtools_integration.tsses() --- gffutils/pybedtools_integration.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gffutils/pybedtools_integration.py b/gffutils/pybedtools_integration.py index 5c5c2b90..e01e4911 100644 --- a/gffutils/pybedtools_integration.py +++ b/gffutils/pybedtools_integration.py @@ -113,7 +113,7 @@ def tsses( if they overlap (as in the first two): - >>> print(tsses(db)) # doctest: +NORMALIZE_WHITESPACE + >>> print(gffutils.pybedtools_integration.tsses(db)) # doctest: +NORMALIZE_WHITESPACE chr2L gffutils_derived transcript_TSS 7529 7529 . + . gene_id "FBgn0031208"; transcript_id "FBtr0300689"; chr2L gffutils_derived transcript_TSS 7529 7529 . + . gene_id "FBgn0031208"; transcript_id "FBtr0300690"; chr2L gffutils_derived transcript_TSS 11000 11000 . - . gene_id "Fk_gene_1"; transcript_id "transcript_Fk_gene_1"; @@ -124,7 +124,7 @@ def tsses( Default merging, showing the first two TSSes merged and reported as a single unique TSS for the gene. Note the conversion to BED: - >>> x = tsses(db, merge_overlapping=True) + >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True) >>> print(x) # doctest: +NORMALIZE_WHITESPACE chr2L 7528 7529 FBgn0031208 . + chr2L 10999 11000 Fk_gene_1 . - @@ -135,7 +135,7 @@ def tsses( be easier to parse than the original GTF or GFF file. With no merging specified, we must add `as_bed6=True` to see the names in BED format. - >>> x = tsses(db, attrs=['gene_id', 'transcript_id'], as_bed6=True) + >>> x = gffutils.pybedtools_integration.tsses(db, attrs=['gene_id', 'transcript_id'], as_bed6=True) >>> print(x) # doctest: +NORMALIZE_WHITESPACE chr2L 7528 7529 FBgn0031208:FBtr0300689 . + chr2L 7528 7529 FBgn0031208:FBtr0300690 . + @@ -145,7 +145,7 @@ def tsses( Use a 3kb merge distance so the last 2 features are merged together: - >>> x = tsses(db, merge_overlapping=True, merge_kwargs=dict(d=3000)) + >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True, merge_kwargs=dict(d=3000)) >>> print(x) # doctest: +NORMALIZE_WHITESPACE chr2L 7528 7529 FBgn0031208 . + chr2L 10999 12500 Fk_gene_1,Fk_gene_2 . - @@ -154,7 +154,7 @@ def tsses( The set of unique TSSes for each gene, +1kb upstream and 500bp downstream: - >>> x = tsses(db, merge_overlapping=True) + >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True) >>> x = x.slop(l=1000, r=500, s=True, genome='dm3') >>> print(x) # doctest: +NORMALIZE_WHITESPACE chr2L 6528 8029 FBgn0031208 . + From 23b702bf63f4368eef5bbd935869fad979ca0dd2 Mon Sep 17 00:00:00 2001 From: Ryan Dale <115406+daler@users.noreply.github.com> Date: Sat, 17 Jan 2026 17:20:38 -0500 Subject: [PATCH 7/9] don't need module name on api docs autosummary --- doc/source/api.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 2f9adefe..309a689a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -117,10 +117,10 @@ Integration with other tools :toctree: autodocs :nosignatures: - gffutils.biopython_integration.to_seqfeature - gffutils.biopython_integration.from_seqfeature - gffutils.pybedtools_integration.tsses - gffutils.pybedtools_integration.to_bedtool + biopython_integration.to_seqfeature + biopython_integration.from_seqfeature + pybedtools_integration.tsses + pybedtools_integration.to_bedtool @@ -131,10 +131,10 @@ Utilities :toctree: autodocs :nosignatures: - gffutils.helpers.asinterval - gffutils.helpers.merge_attributes - gffutils.helpers.sanitize_gff_db - gffutils.helpers.annotate_gff_db - gffutils.helpers.infer_dialect - gffutils.helpers.example_filename - gffutils.inspect.inspect + helpers.asinterval + helpers.merge_attributes + helpers.sanitize_gff_db + helpers.annotate_gff_db + helpers.infer_dialect + helpers.example_filename + inspect.inspect From 0b93485f5d64c1ca42968e59b0ee0ef1c9c98c24 Mon Sep 17 00:00:00 2001 From: Ryan Dale <115406+daler@users.noreply.github.com> Date: Sat, 17 Jan 2026 17:21:23 -0500 Subject: [PATCH 8/9] don't use now-deprecated doc theme options --- doc/source/conf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index c65c4a28..2b85647c 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -53,5 +53,3 @@ templates_path = ['_templates'] exclude_patterns = [] html_theme = 'sphinx_rtd_theme' -html_static_path = ['_static'] -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] From 662f92a17d3c814557cd9b0663c905268604501a Mon Sep 17 00:00:00 2001 From: Ryan Dale <115406+daler@users.noreply.github.com> Date: Sat, 17 Jan 2026 17:22:07 -0500 Subject: [PATCH 9/9] escape backslash in docstring --- gffutils/interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gffutils/interface.py b/gffutils/interface.py index 9216cd39..ee9ec743 100644 --- a/gffutils/interface.py +++ b/gffutils/interface.py @@ -1285,7 +1285,7 @@ def create_introns( with open('tmp.gtf', 'w') as fout: for intron in db.create_introns(**intron_kwargs): - fout.write(str(intron) + "\n") + fout.write(str(intron) + "\\n") db.update(gffutils.DataIterator('tmp.gtf'), **create_kwargs) """