Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ jobs:

- name: conda env
run: |
wget -O Mambaforge.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh"
bash Mambaforge.sh -b -p "${HOME}/conda"
wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
bash Miniforge3.sh -b -p "${HOME}/conda"
source "${HOME}/conda/etc/profile.d/conda.sh"
source "${HOME}/conda/etc/profile.d/mamba.sh"
which conda
Expand Down Expand Up @@ -102,7 +101,7 @@ jobs:

- name: push artifact
if: ${{ (matrix.python-version == 3.9) }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: doc
path: /tmp/docs
Expand Down
22 changes: 11 additions & 11 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,10 @@ Integration with other tools
:toctree: autodocs
:nosignatures:

gffutils.biopython_integration.to_seqfeature
gffutils.biopython_integration.from_seqfeature
gffutils.pybedtools_integration.tsses
gffutils.pybedtools_integration.to_bedtool
biopython_integration.to_seqfeature
biopython_integration.from_seqfeature
pybedtools_integration.tsses
pybedtools_integration.to_bedtool



Expand All @@ -131,10 +131,10 @@ Utilities
:toctree: autodocs
:nosignatures:

gffutils.helpers.asinterval
gffutils.helpers.merge_attributes
gffutils.helpers.sanitize_gff_db
gffutils.helpers.annotate_gff_db
gffutils.helpers.infer_dialect
gffutils.helpers.example_filename
gffutils.inspect.inspect
helpers.asinterval
helpers.merge_attributes
helpers.sanitize_gff_db
helpers.annotate_gff_db
helpers.infer_dialect
helpers.example_filename
inspect.inspect
2 changes: 0 additions & 2 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,3 @@
templates_path = ['_templates']
exclude_patterns = []
html_theme = 'sphinx_rtd_theme'
html_static_path = ['_static']
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
2 changes: 1 addition & 1 deletion gffutils/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -1285,7 +1285,7 @@ def create_introns(

with open('tmp.gtf', 'w') as fout:
for intron in db.create_introns(**intron_kwargs):
fout.write(str(intron) + "\n")
fout.write(str(intron) + "\\n")
db.update(gffutils.DataIterator('tmp.gtf'), **create_kwargs)

"""
Expand Down
10 changes: 5 additions & 5 deletions gffutils/pybedtools_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def tsses(
if they overlap (as in the first two):


>>> print(tsses(db)) # doctest: +NORMALIZE_WHITESPACE
>>> print(gffutils.pybedtools_integration.tsses(db)) # doctest: +NORMALIZE_WHITESPACE
chr2L gffutils_derived transcript_TSS 7529 7529 . + . gene_id "FBgn0031208"; transcript_id "FBtr0300689";
chr2L gffutils_derived transcript_TSS 7529 7529 . + . gene_id "FBgn0031208"; transcript_id "FBtr0300690";
chr2L gffutils_derived transcript_TSS 11000 11000 . - . gene_id "Fk_gene_1"; transcript_id "transcript_Fk_gene_1";
Expand All @@ -124,7 +124,7 @@ def tsses(
Default merging, showing the first two TSSes merged and reported as
a single unique TSS for the gene. Note the conversion to BED:

>>> x = tsses(db, merge_overlapping=True)
>>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True)
>>> print(x) # doctest: +NORMALIZE_WHITESPACE
chr2L 7528 7529 FBgn0031208 . +
chr2L 10999 11000 Fk_gene_1 . -
Expand All @@ -135,7 +135,7 @@ def tsses(
be easier to parse than the original GTF or GFF file. With no merging
specified, we must add `as_bed6=True` to see the names in BED format.

>>> x = tsses(db, attrs=['gene_id', 'transcript_id'], as_bed6=True)
>>> x = gffutils.pybedtools_integration.tsses(db, attrs=['gene_id', 'transcript_id'], as_bed6=True)
>>> print(x) # doctest: +NORMALIZE_WHITESPACE
chr2L 7528 7529 FBgn0031208:FBtr0300689 . +
chr2L 7528 7529 FBgn0031208:FBtr0300690 . +
Expand All @@ -145,7 +145,7 @@ def tsses(

Use a 3kb merge distance so the last 2 features are merged together:

>>> x = tsses(db, merge_overlapping=True, merge_kwargs=dict(d=3000))
>>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True, merge_kwargs=dict(d=3000))
>>> print(x) # doctest: +NORMALIZE_WHITESPACE
chr2L 7528 7529 FBgn0031208 . +
chr2L 10999 12500 Fk_gene_1,Fk_gene_2 . -
Expand All @@ -154,7 +154,7 @@ def tsses(

The set of unique TSSes for each gene, +1kb upstream and 500bp downstream:

>>> x = tsses(db, merge_overlapping=True)
>>> x = gffutils.pybedtools_integration.tsses(db, merge_overlapping=True)
>>> x = x.slop(l=1000, r=500, s=True, genome='dm3')
>>> print(x) # doctest: +NORMALIZE_WHITESPACE
chr2L 6528 8029 FBgn0031208 . +
Expand Down
12 changes: 6 additions & 6 deletions gffutils/scripts/gffutils-cli
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def fetch(db, ids):
(like grep -v)''')
@arg('--exclude-self', help='''Use this to suppress reporting the IDs you've
provided.''')
def children(db, ids, limit=None, exclude=None, exclude_self=False):
def children(db, ids, *, limit=None, exclude=None, exclude_self=False):
"""
Fetch children from the database according to ID.
"""
Expand Down Expand Up @@ -110,7 +110,7 @@ def children(db, ids, limit=None, exclude=None, exclude_self=False):
(like grep -v)''')
@arg('--exclude-self', help='''Use this to suppress reporting the IDs you've
provided.''')
def parents(db, ids, limit=None, exclude=None, exclude_self=False):
def parents(db, ids, *, limit=None, exclude=None, exclude_self=False):
"""
Fetch parents from the database according to ID.
"""
Expand Down Expand Up @@ -167,7 +167,7 @@ def common(db):
@arg('--disable-infer-transcripts', help='''Disable inferring of transcript
extents for GTF files. Use this if your GTF file already has "transcript"
featuretypes''')
def create(filename, output=None, force=False, quiet=False, merge="merge",
def create(filename, *, output=None, force=False, quiet=False, merge="merge",
disable_infer_genes=False, disable_infer_transcripts=False):
"""
Create a database.
Expand Down Expand Up @@ -198,7 +198,7 @@ def clean(filename):
@arg('--in-place',
help='''Sanitize file in-place: overwrites current file with sanitized
version.''')
def sanitize(filename,
def sanitize(filename, *,
in_memory=True,
in_place=False):
"""
Expand All @@ -225,7 +225,7 @@ def sanitize(filename,
@arg('filename', help='''GFF or GTF file to use.''')
@arg('--in-place', help='''Remove duplicates in place (overwrite current
file.)''')
def rmdups(filename, in_place=False):
def rmdups(filename, *, in_place=False):
"""
Remove duplicates from a GFF file.
"""
Expand Down Expand Up @@ -278,7 +278,7 @@ def convert(filename):
@arg('--featuretype', help='''Restrict to a particular featuretype. This can
be faster than doing a grep on the output, since it restricts the search
space in the database''')
def search(db, text, featuretype=None):
def search(db, text, *, featuretype=None):
"""
Search the attributes.
"""
Expand Down
91 changes: 42 additions & 49 deletions gffutils/test/test_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,58 +482,51 @@ def test_sanitize_gff():
print("Sanitized GFF successfully.")


def test_region():

@pytest.mark.parametrize("kwargs,expected", [
# previously failed, see issue #45
(dict(seqid="chr2L", start=1, end=2e9, completely_within=True), 27),
(dict(region="chr2L", start=0), ValueError),
(dict(region="chr2L", end=0), ValueError),
(dict(region="chr2L", seqid=0), ValueError),
# these coords should catch everything
(dict(region="chr2L:7529-12500"), 27),
# stranded versions:
(dict(region="chr2L:7529-12500", strand="."), 0),
(dict(region="chr2L:7529-12500", strand="+"), 21),
(dict(region="chr2L:7529-12500", strand="-"), 6),
# different ways of selecting only that last exon in the last gene:
(dict(seqid="chr2L", start=11500, featuretype="exon"), 1),
(dict(seqid="chr2L", start=9500, featuretype="exon", strand="+"), 1),
# alternative method
(dict(seqid="chr2L", start=7529, end=12500), 27),
# since default completely_within=False, this catches anything that
# falls after 7680. So it only excludes the 5'UTR, which ends at 7679.
(dict(seqid="chr2L", start=7680), 26),
# but completely_within=True will exclude the gene and mRNAs, first
# exon and the 5'UTR
(dict(seqid="chr2L", start=7680, completely_within=True), 22),
# similarly, this will *exclude* anything before 7680
(dict(seqid="chr2L", end=7680), 5),
# and also similarly, this will only get us the 5'UTR which is the only
# feature falling completely before 7680
(dict(seqid="chr2L", end=7680, completely_within=True), 1),
# and there's only features from chr2L in this file, so this catches
# everything too
(dict(region="chr2L"), 27),
# using seqid should work similarly to `region` with only chromosome
(dict(seqid="chr2L"), 27),
# nonexistent
(dict(region="nowhere"), 0),
])
def test_region(kwargs, expected):
db_fname = gffutils.example_filename("FBgn0031208.gff")
db = gffutils.create_db(db_fname, ":memory:", keep_order=True)

def _check(item):
kwargs, expected = item
try:
obs = list(db.region(**kwargs))
assert len(obs) == expected, "expected %s got %s" % (expected, len(obs))
except expected:
pass

regions = [
# previously failed, see issue #45
(dict(seqid="chr2L", start=1, end=2e9, completely_within=True), 27),
(dict(region="chr2L", start=0), ValueError),
(dict(region="chr2L", end=0), ValueError),
(dict(region="chr2L", seqid=0), ValueError),
# these coords should catch everything
(dict(region="chr2L:7529-12500"), 27),
# stranded versions:
(dict(region="chr2L:7529-12500", strand="."), 0),
(dict(region="chr2L:7529-12500", strand="+"), 21),
(dict(region="chr2L:7529-12500", strand="-"), 6),
# different ways of selecting only that last exon in the last gene:
(dict(seqid="chr2L", start=11500, featuretype="exon"), 1),
(dict(seqid="chr2L", start=9500, featuretype="exon", strand="+"), 1),
# alternative method
(dict(seqid="chr2L", start=7529, end=12500), 27),
# since default completely_within=False, this catches anything that
# falls after 7680. So it only excludes the 5'UTR, which ends at 7679.
(dict(seqid="chr2L", start=7680), 26),
# but completely_within=True will exclude the gene and mRNAs, first
# exon and the 5'UTR
(dict(seqid="chr2L", start=7680, completely_within=True), 22),
# similarly, this will *exclude* anything before 7680
(dict(seqid="chr2L", end=7680), 5),
# and also similarly, this will only get us the 5'UTR which is the only
# feature falling completely before 7680
(dict(seqid="chr2L", end=7680, completely_within=True), 1),
# and there's only features from chr2L in this file, so this catches
# everything too
(dict(region="chr2L"), 27),
# using seqid should work similarly to `region` with only chromosome
(dict(seqid="chr2L"), 27),
# nonexistent
(dict(region="nowhere"), 0),
]

for item in regions:
yield _check, item
try:
obs = list(db.region(**kwargs))
assert len(obs) == expected, "expected %s got %s" % (expected, len(obs))
except expected:
pass


def test_nonascii():
Expand Down
4 changes: 4 additions & 0 deletions gffutils/test/test_biopython_integration.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from gffutils import example_filename
import gffutils
import gffutils.biopython_integration as bp
import pytest

# Skip tests entirely if BioPython not available
pytest.importorskip('Bio')


def test_roundtrip():
Expand Down