diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d3716aa6..d1ca11f8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,9 +18,8 @@ jobs: - name: conda env run: | - wget -O Mambaforge.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" - curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" - bash Mambaforge.sh -b -p "${HOME}/conda" + wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" + bash Miniforge3.sh -b -p "${HOME}/conda" source "${HOME}/conda/etc/profile.d/conda.sh" source "${HOME}/conda/etc/profile.d/mamba.sh" which conda @@ -102,7 +101,7 @@ jobs: - name: push artifact if: ${{ (matrix.python-version == 3.9) }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: doc path: /tmp/docs diff --git a/doc/source/api.rst b/doc/source/api.rst index 2f9adefe..309a689a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -117,10 +117,10 @@ Integration with other tools :toctree: autodocs :nosignatures: - gffutils.biopython_integration.to_seqfeature - gffutils.biopython_integration.from_seqfeature - gffutils.pybedtools_integration.tsses - gffutils.pybedtools_integration.to_bedtool + biopython_integration.to_seqfeature + biopython_integration.from_seqfeature + pybedtools_integration.tsses + pybedtools_integration.to_bedtool @@ -131,10 +131,10 @@ Utilities :toctree: autodocs :nosignatures: - gffutils.helpers.asinterval - gffutils.helpers.merge_attributes - gffutils.helpers.sanitize_gff_db - gffutils.helpers.annotate_gff_db - gffutils.helpers.infer_dialect - gffutils.helpers.example_filename - gffutils.inspect.inspect + helpers.asinterval + helpers.merge_attributes + helpers.sanitize_gff_db + helpers.annotate_gff_db + helpers.infer_dialect + helpers.example_filename + inspect.inspect diff --git a/doc/source/conf.py b/doc/source/conf.py index c65c4a28..2b85647c 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -53,5 +53,3 @@ templates_path = ['_templates'] exclude_patterns = [] html_theme = 'sphinx_rtd_theme' -html_static_path = ['_static'] -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] diff --git a/gffutils/interface.py b/gffutils/interface.py index 9216cd39..ee9ec743 100644 --- a/gffutils/interface.py +++ b/gffutils/interface.py @@ -1285,7 +1285,7 @@ def create_introns( with open('tmp.gtf', 'w') as fout: for intron in db.create_introns(**intron_kwargs): - fout.write(str(intron) + "\n") + fout.write(str(intron) + "\\n") db.update(gffutils.DataIterator('tmp.gtf'), **create_kwargs) """ diff --git a/gffutils/pybedtools_integration.py b/gffutils/pybedtools_integration.py index 5c5c2b90..e01e4911 100644 --- a/gffutils/pybedtools_integration.py +++ b/gffutils/pybedtools_integration.py @@ -113,7 +113,7 @@ def tsses( if they overlap (as in the first two): - >>> print(tsses(db)) # doctest: +NORMALIZE_WHITESPACE + >>> print(gffutils.pybedtools_integration.tsses(db)) # doctest: +NORMALIZE_WHITESPACE chr2L gffutils_derived transcript_TSS 7529 7529 . + . gene_id "FBgn0031208"; transcript_id "FBtr0300689"; chr2L gffutils_derived transcript_TSS 7529 7529 . + . gene_id "FBgn0031208"; transcript_id "FBtr0300690"; chr2L gffutils_derived transcript_TSS 11000 11000 . - . gene_id "Fk_gene_1"; transcript_id "transcript_Fk_gene_1"; @@ -124,7 +124,7 @@ def tsses( Default merging, showing the first two TSSes merged and reported as a single unique TSS for the gene. Note the conversion to BED: - >>> x = tsses(db, merge_overlapping=True) + >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True) >>> print(x) # doctest: +NORMALIZE_WHITESPACE chr2L 7528 7529 FBgn0031208 . + chr2L 10999 11000 Fk_gene_1 . - @@ -135,7 +135,7 @@ def tsses( be easier to parse than the original GTF or GFF file. With no merging specified, we must add `as_bed6=True` to see the names in BED format. - >>> x = tsses(db, attrs=['gene_id', 'transcript_id'], as_bed6=True) + >>> x = gffutils.pybedtools_integration.tsses(db, attrs=['gene_id', 'transcript_id'], as_bed6=True) >>> print(x) # doctest: +NORMALIZE_WHITESPACE chr2L 7528 7529 FBgn0031208:FBtr0300689 . + chr2L 7528 7529 FBgn0031208:FBtr0300690 . + @@ -145,7 +145,7 @@ def tsses( Use a 3kb merge distance so the last 2 features are merged together: - >>> x = tsses(db, merge_overlapping=True, merge_kwargs=dict(d=3000)) + >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True, merge_kwargs=dict(d=3000)) >>> print(x) # doctest: +NORMALIZE_WHITESPACE chr2L 7528 7529 FBgn0031208 . + chr2L 10999 12500 Fk_gene_1,Fk_gene_2 . - @@ -154,7 +154,7 @@ def tsses( The set of unique TSSes for each gene, +1kb upstream and 500bp downstream: - >>> x = tsses(db, merge_overlapping=True) + >>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True) >>> x = x.slop(l=1000, r=500, s=True, genome='dm3') >>> print(x) # doctest: +NORMALIZE_WHITESPACE chr2L 6528 8029 FBgn0031208 . + diff --git a/gffutils/test/test_1.py b/gffutils/test/test_1.py index 2b88cc04..d793aa49 100644 --- a/gffutils/test/test_1.py +++ b/gffutils/test/test_1.py @@ -482,58 +482,51 @@ def test_sanitize_gff(): print("Sanitized GFF successfully.") -def test_region(): - +@pytest.mark.parametrize("kwargs,expected", [ + # previously failed, see issue #45 + (dict(seqid="chr2L", start=1, end=2e9, completely_within=True), 27), + (dict(region="chr2L", start=0), ValueError), + (dict(region="chr2L", end=0), ValueError), + (dict(region="chr2L", seqid=0), ValueError), + # these coords should catch everything + (dict(region="chr2L:7529-12500"), 27), + # stranded versions: + (dict(region="chr2L:7529-12500", strand="."), 0), + (dict(region="chr2L:7529-12500", strand="+"), 21), + (dict(region="chr2L:7529-12500", strand="-"), 6), + # different ways of selecting only that last exon in the last gene: + (dict(seqid="chr2L", start=11500, featuretype="exon"), 1), + (dict(seqid="chr2L", start=9500, featuretype="exon", strand="+"), 1), + # alternative method + (dict(seqid="chr2L", start=7529, end=12500), 27), + # since default completely_within=False, this catches anything that + # falls after 7680. So it only excludes the 5'UTR, which ends at 7679. + (dict(seqid="chr2L", start=7680), 26), + # but completely_within=True will exclude the gene and mRNAs, first + # exon and the 5'UTR + (dict(seqid="chr2L", start=7680, completely_within=True), 22), + # similarly, this will *exclude* anything before 7680 + (dict(seqid="chr2L", end=7680), 5), + # and also similarly, this will only get us the 5'UTR which is the only + # feature falling completely before 7680 + (dict(seqid="chr2L", end=7680, completely_within=True), 1), + # and there's only features from chr2L in this file, so this catches + # everything too + (dict(region="chr2L"), 27), + # using seqid should work similarly to `region` with only chromosome + (dict(seqid="chr2L"), 27), + # nonexistent + (dict(region="nowhere"), 0), +]) +def test_region(kwargs, expected): db_fname = gffutils.example_filename("FBgn0031208.gff") db = gffutils.create_db(db_fname, ":memory:", keep_order=True) - def _check(item): - kwargs, expected = item - try: - obs = list(db.region(**kwargs)) - assert len(obs) == expected, "expected %s got %s" % (expected, len(obs)) - except expected: - pass - - regions = [ - # previously failed, see issue #45 - (dict(seqid="chr2L", start=1, end=2e9, completely_within=True), 27), - (dict(region="chr2L", start=0), ValueError), - (dict(region="chr2L", end=0), ValueError), - (dict(region="chr2L", seqid=0), ValueError), - # these coords should catch everything - (dict(region="chr2L:7529-12500"), 27), - # stranded versions: - (dict(region="chr2L:7529-12500", strand="."), 0), - (dict(region="chr2L:7529-12500", strand="+"), 21), - (dict(region="chr2L:7529-12500", strand="-"), 6), - # different ways of selecting only that last exon in the last gene: - (dict(seqid="chr2L", start=11500, featuretype="exon"), 1), - (dict(seqid="chr2L", start=9500, featuretype="exon", strand="+"), 1), - # alternative method - (dict(seqid="chr2L", start=7529, end=12500), 27), - # since default completely_within=False, this catches anything that - # falls after 7680. So it only excludes the 5'UTR, which ends at 7679. - (dict(seqid="chr2L", start=7680), 26), - # but completely_within=True will exclude the gene and mRNAs, first - # exon and the 5'UTR - (dict(seqid="chr2L", start=7680, completely_within=True), 22), - # similarly, this will *exclude* anything before 7680 - (dict(seqid="chr2L", end=7680), 5), - # and also similarly, this will only get us the 5'UTR which is the only - # feature falling completely before 7680 - (dict(seqid="chr2L", end=7680, completely_within=True), 1), - # and there's only features from chr2L in this file, so this catches - # everything too - (dict(region="chr2L"), 27), - # using seqid should work similarly to `region` with only chromosome - (dict(seqid="chr2L"), 27), - # nonexistent - (dict(region="nowhere"), 0), - ] - - for item in regions: - yield _check, item + try: + obs = list(db.region(**kwargs)) + assert len(obs) == expected, "expected %s got %s" % (expected, len(obs)) + except expected: + pass def test_nonascii(): diff --git a/gffutils/test/test_biopython_integration.py b/gffutils/test/test_biopython_integration.py index 58c5866a..e9f8e81d 100644 --- a/gffutils/test/test_biopython_integration.py +++ b/gffutils/test/test_biopython_integration.py @@ -1,6 +1,10 @@ from gffutils import example_filename import gffutils import gffutils.biopython_integration as bp +import pytest + +# Skip tests entirely if BioPython not available +pytest.importorskip('Bio') def test_roundtrip():