diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml deleted file mode 100644 index 0798850..0000000 --- a/.github/workflows/lint.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Lint - -on: [push, pull_request] - -jobs: - lint: - runs-on: ubuntu-latest - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - steps: - - uses: actions/checkout@v4 - - uses: psf/black@stable - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Install flake8 - run: pip install flake8 - - name: Run flake8 - uses: suo/flake8-github-action@releases/v1 - with: - checkName: 'lint' diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 1e35018..b0e6567 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -7,8 +7,16 @@ on: pull_request: [] jobs: + lint: + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/ruff-action@v3 + - run: ruff format build: - + needs: lint runs-on: ubuntu-latest strategy: max-parallel: 4 @@ -18,7 +26,6 @@ jobs: - "3.11" - "3.12" - "3.13" - steps: - name: Install apt packages run: | @@ -46,15 +53,6 @@ jobs: echo $poetryv - name: Install package for testing run: poetry install -v --all-extras - - name: Lint with flake8 - run: | - pipx install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings - # The GitHub editor is 127 chars wide - flake8 . --count --ignore=E302 --exit-zero --max-complexity=10 \ - --max-line-length=127 --statistics - name: Test with pytest run: poetry run pytest --script-launch-mode=subprocess - name: Reinstall package for production diff --git a/pyproject.toml b/pyproject.toml index bd614cd..aad37f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,6 @@ tact_add_config = "tact.cli_add_toml:main" tact_build_taxonomic_tree = "tact.cli_taxonomy:main" tact_check_results = "tact.cli_check_trees:main" - [tool.poetry] include = ["examples"] classifiers = [ @@ -56,9 +55,36 @@ classifiers = [ "Topic :: Scientific/Engineering :: Bio-Informatics" ] -[tool.black] +[tool.ruff] line-length = 118 -target-version = ['py38', 'py39', 'py310', 'py311'] + +[tool.ruff.lint] +fixable = ["ALL"] +select = [ + "NPY", # numpy warnings + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "D", # pydocstyle + "DOC", # pydoclint + "I", # isort + "RUF", # ruff + "B", # flake8-bugbear + "UP", # pyupgrade + "ICN", # flake8-import-conventions + "FA", # flake8-future-annotations + "C4", # flake8-comprehensions + "A" # flake8-builtins +] + +[tool.ruff.lint.per-file-ignores] +# Ignore all directories named `tests` and CLI modules. +"tests/**" = ["D", "DOC"] +"tact/cli_*" = ["D", "DOC"] +"__init__.py" = ["D", "DOC"] + +[tool.ruff.lint.pydocstyle] +convention = "google" [build-system] requires = ["poetry_core>=2.0"] diff --git a/tact/cli_add_taxa.py b/tact/cli_add_taxa.py index a8112a6..b18caf8 100755 --- a/tact/cli_add_taxa.py +++ b/tact/cli_add_taxa.py @@ -1,11 +1,8 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # Try to assign tips to a pre-existing tree based on a taxonomy # Jonathan Chang, May 13, 2016 -from __future__ import division -from __future__ import print_function import csv import logging @@ -18,21 +15,20 @@ import dendropy from . import fastmrca -from .lib import crown_capture_probability -from .lib import get_new_times -from .tree_util import get_ages -from .tree_util import get_birth_death_rates -from .tree_util import get_min_age -from .tree_util import get_short_branches -from .tree_util import get_tip_labels -from .tree_util import graft_node -from .tree_util import is_binary -from .tree_util import is_fully_locked -from .tree_util import lock_clade -from .tree_util import update_tree_view -from .validation import validate_outgroups -from .validation import validate_taxonomy_tree -from .validation import BackboneCommand +from .lib import crown_capture_probability, get_new_times +from .tree_util import ( + get_ages, + get_birth_death_rates, + get_min_age, + get_short_branches, + get_tip_labels, + graft_node, + is_binary, + is_fully_locked, + lock_clade, + update_tree_view, +) +from .validation import BackboneCommand, validate_outgroups, validate_taxonomy_tree logger = logging.getLogger(__name__) # Speed up logging for PyPy @@ -87,9 +83,7 @@ def search_ancestors_for_valid_backbone_node(taxonomy_node, backbone_tips, ccp): def get_new_branching_times(backbone_node, taxonomy_node, told=None, tyoung=0, min_ccp=0.8, num_new_times=None): - """ - Get `n_total` new branching times for a `node`. - """ + """Get `n_total` new branching times for a `node`.""" global mrca_rates taxon = taxonomy_node.label birth, death, ccp, _ = mrca_rates[taxon] @@ -131,7 +125,7 @@ def get_new_branching_times(backbone_node, taxonomy_node, told=None, tyoung=0, m def fill_new_taxa(namespace, node, new_taxa, times, stem=False): - for new_species, new_age in zip(new_taxa, times): + for new_species, new_age in zip(new_taxa, times, strict=True): new_node = dendropy.Node() new_node.annotations.add_new("creation_method", "fill_new_taxa") new_node.age = new_age @@ -184,7 +178,7 @@ def create_clade(namespace, species, ages): # Lock the child of the seed node so that things can still attach to the stem of this new clade lock_clade(tree.seed_node.child_nodes()[0]) if list(get_short_branches(tree.seed_node)): - logger.info("{} short branches detected".format(len(list(get_short_branches(tree.seed_node))))) + logger.info(f"{len(list(get_short_branches(tree.seed_node)))} short branches detected") return tree @@ -315,9 +309,7 @@ def run_precalcs(taxonomy_tree, backbone_tree, min_ccp=0.8, yule=False): ) @click.option("-v", "--verbose", help="emit extra information (can be repeated)", count=True) def main(taxonomy, backbone, outgroups, output, min_ccp, verbose, yule, ultrametricity_precision): - """ - Add tips onto a BACKBONE phylogeny using a TAXONOMY phylogeny. - """ + """Add tips onto a BACKBONE phylogeny using a TAXONOMY phylogeny.""" logger.addHandler(logging.FileHandler(output + ".log.txt")) if verbose >= 2: logger.setLevel(logging.DEBUG) diff --git a/tact/cli_add_toml.py b/tact/cli_add_toml.py index cbeac2b..1cdd5df 100755 --- a/tact/cli_add_toml.py +++ b/tact/cli_add_toml.py @@ -1,35 +1,34 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# Try to assign tips to a pre-existing tree based on a TOML configuration file -# Jonathan Chang, Aug 14, 2021 +"""Command line interface to assign tips to a pre-existing tree based on a TOML configuration file.""" from __future__ import annotations -from concurrent.futures import ProcessPoolExecutor, as_completed -from dataclasses import dataclass, field, InitVar -from collections import defaultdict import copy import logging +import operator import os import re import sys -import typing +from collections import defaultdict +from concurrent.futures import ProcessPoolExecutor, as_completed +from dataclasses import InitVar, dataclass, field +from functools import reduce import click import dendropy import toml from .lib import get_new_times -from .tree_util import get_ages -from .tree_util import get_birth_death_rates -from .tree_util import get_min_age -from .tree_util import get_tip_labels -from .tree_util import graft_node -from .tree_util import is_binary -from .tree_util import lock_clade -from .tree_util import unlock_clade -from .tree_util import update_tree_view +from .tree_util import ( + get_ages, + get_birth_death_rates, + get_min_age, + get_tip_labels, + graft_node, + is_binary, + lock_clade, + unlock_clade, + update_tree_view, +) from .validation import BackboneCommand logger = logging.getLogger(__name__) @@ -42,9 +41,9 @@ @dataclass class TactConstraint: - """Class for keeping track of a constraint in TACT (positive or negative)""" + """Class for keeping track of a constraint in TACT (positive or negative).""" - mrca: typing.List[str] = field(default_factory=list) + mrca: list[str] = field(default_factory=list) stem: bool = False def __post_init__(self): @@ -91,6 +90,7 @@ def __post_init__(self, include, exclude): def ensure_mrca(tree, tips, node=None): + """Perform initial checks to ensure we can do MRCA calculations.""" try: node = node if node else tree.seed_node return tree.mrca(taxon_labels=tips, start_node=node) @@ -112,7 +112,7 @@ def ensure_mrca(tree, tips, node=None): def do_tact(tree, item): # First, get the MRCA of _all_ `include` leafs. This is the basis of our rate computation, # and how we actually implement polyphyletic groups. - included_tips = sum([x.mrca for x in item.include], []) + included_tips = reduce(operator.iadd, [x.mrca for x in item.include], []) mrca_node = ensure_mrca(tree, included_tips) # Compute the rates on that (possibly expansive) MRCA node. @@ -143,7 +143,7 @@ def do_tact(tree, item): genera_map[genus].add(tip) if len(genera_map) > 1: - for genus, species in genera_map.items(): + for species in genera_map.values(): node = tree.mrca(taxon_labels=species, start_node=inner_mrca_node) if node and species == get_tip_labels(node): if len(species) == 1: @@ -178,6 +178,7 @@ def do_tact(tree, item): def do_replicate(backbone, to_tact, label): + """Perform a replicate of a TACT analysis.""" logger.info(f"<<< Replicate {label} >>>") tree = copy.deepcopy(backbone) for item in to_tact: @@ -208,9 +209,7 @@ def do_replicate(backbone, to_tact, label): default=os.cpu_count() or 1, ) def main(config, backbone, output, verbose, ultrametricity_precision, replicates, cores): - """ - Add tips onto a BACKBONE phylogeny using a CONFIG file - """ + """Add tips onto a BACKBONE phylogeny using a CONFIG file.""" logger.addHandler(logging.FileHandler(output + ".log.txt")) if verbose >= 2: logger.setLevel(logging.DEBUG) @@ -226,7 +225,7 @@ def main(config, backbone, output, verbose, ultrametricity_precision, replicates to_tact = [TactItem(**x) for x in config["tact"]] # Ensure the proper ordering of TACT items based on divergence time of implied MRCA nodes - to_tact.sort(key=lambda item: ensure_mrca(backbone, sum([x.mrca for x in item.include], [])).age) + to_tact.sort(key=lambda ii: ensure_mrca(backbone, reduce(operator.iadd, [x.mrca for x in ii.include], [])).age) # Compute global birth/death rates. Not currently used (but could be?) backbone_tips = len(backbone.leaf_nodes()) diff --git a/tact/cli_check_trees.py b/tact/cli_check_trees.py index a4ba259..6cd9b87 100755 --- a/tact/cli_check_trees.py +++ b/tact/cli_check_trees.py @@ -1,7 +1,4 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import division +"""Command-line interface module to check TACT results.""" import csv import functools @@ -12,13 +9,11 @@ import click import dendropy -from .tree_util import get_birth_death_rates -from .tree_util import get_monophyletic_node -from .tree_util import get_tip_labels -from .tree_util import get_tree +from .tree_util import get_birth_death_rates, get_monophyletic_node, get_tip_labels, get_tree def analyze_taxon(bb_tips, st_tips, backbone, simtaxed, taxon_node): + """Perform various checks for a given taxon.""" taxon = taxon_node.label if not taxon: return None @@ -89,14 +84,13 @@ def analyze_taxon(bb_tips, st_tips, backbone, simtaxed, taxon_node): ) @click.option("--chunksize", help="number of tree nodes to allocate to each core", type=click.IntRange(1)) def main(simulated, backbone, taxonomy, output, cores, chunksize): - """ - Check a SIMULATED phylogeny for consistency with its backbone source tree and a taxonomy. + """Check a SIMULATED phylogeny for consistency with its backbone source tree and a taxonomy. The SIMULATED phylogeny should have been generated by the tact_add_taxa script. All phylogenies should be in Newick format. """ pool = multiprocessing.Pool(processes=cores) - click.echo("Using %d parallel cores" % cores, err=True) + click.echo(f"Using {cores} parallel cores", err=True) taxonomy = dendropy.Tree.get_from_path(taxonomy, schema="newick") tn = taxonomy.taxon_namespace click.echo("Taxonomy OK", err=True) diff --git a/tact/cli_taxonomy.py b/tact/cli_taxonomy.py index d32fed3..4c8fdd0 100755 --- a/tact/cli_taxonomy.py +++ b/tact/cli_taxonomy.py @@ -1,3 +1,5 @@ +"""Command-line interface module to construct taxonomic phylogenies.""" + import collections import csv @@ -8,12 +10,11 @@ def fix_file(filename): + """Slurps a file, and does various checks and fixes. + + These checks will sort the file and ensures column names are unique. """ - Slurps a file, and does various checks and fixes: - * Sorts the file - * Ensures column names are unique - """ - with open(filename, "r", encoding="utf-8") as rfile: + with open(filename, encoding="utf-8") as rfile: lines = rfile.readlines() heads = collections.defaultdict(int) @@ -32,7 +33,7 @@ def fix_file(filename): def ensure(st, ctx=""): - "Ensures that a cell is not empty." + """Ensures that a cell is not empty.""" if len(st) == 0: if len(ctx) > 0: text = f" Offending line:\n{','.join(ctx)}" @@ -40,6 +41,10 @@ def ensure(st, ctx=""): def mangle_rank(row, names): + """Mangles a name with the rank associated with that name. + + This ensures that when constructing the taxonomic tree, each node has a unique name. + """ seen = set() new = [] for idx, item in enumerate(row): @@ -54,10 +59,9 @@ def mangle_rank(row, names): def build_taxonomic_tree(filename): - """ - Builds a taxonomic tree given a filename. Last column is assumed to - be a species name. All ranks must nest completely within the next - highest rank. + """Builds a taxonomic tree given a filename referring to a CSV. + + The last column is assumed to be a species name. All ranks must nest completely within the next highest rank. """ lines = fix_file(filename) reader = csv.reader(lines) @@ -75,7 +79,7 @@ def build_taxonomic_tree(filename): mangled_ranks = set() row = ["__TAXONOMIC_ROOT__", *next(reader)] mangled_row = mangle_rank(row, rank_names) - for orig, new in zip(row, mangled_row): + for orig, new in zip(row, mangled_row, strict=True): if orig != new: mangled_ranks.add((orig, new)) row = mangled_row @@ -93,13 +97,13 @@ def build_taxonomic_tree(filename): # Uniquify row names row = ["__TAXONOMIC_ROOT__", *row] mangled_row = mangle_rank(row, rank_names) - for orig, new in zip(row, mangled_row): + for orig, new in zip(row, mangled_row, strict=True): if orig != new: mangled_ranks.add((orig, new)) row = mangled_row prev = None - for prev, cur in zip(reversed(stack), reversed(row)): + for prev, cur in zip(reversed(stack), reversed(row), strict=True): ensure(cur, ctx=row) if prev == cur: break @@ -133,7 +137,7 @@ def build_taxonomic_tree(filename): type=click.Choice(["newick", "nexus", "nexml"]), ) def main(taxonomy, output, schema): - """Generates a taxonomic tree from TAXONOMY. + r"""Generates a taxonomic tree from TAXONOMY. TAXONOMY is a comma-separated values file with several requirements. diff --git a/tact/exceptions.py b/tact/exceptions.py index 52ff127..0c1ab92 100644 --- a/tact/exceptions.py +++ b/tact/exceptions.py @@ -1,3 +1,6 @@ +"""Exceptions used by TACT.""" + + class TactError(Exception): """Base class for errors raised by TACT.""" diff --git a/tact/fastmrca.py b/tact/fastmrca.py index 3e412a3..7d0e0fb 100644 --- a/tact/fastmrca.py +++ b/tact/fastmrca.py @@ -1,24 +1,18 @@ """Singleton object that helps speed up MRCA lookups.""" -from __future__ import division - from .tree_util import get_tip_labels global tree def initialize(phy): - """ - Initialize the fastmrca singleton with a tree. - """ + """Initialize the fastmrca singleton with a tree.""" global tree tree = phy def bitmask(labels): - """ - Gets a bitmask for the taxa in `labels`, potentially in parallel. - """ + """Gets a bitmask for the taxa in `labels`, potentially in parallel.""" global tree tn = tree.taxon_namespace return tn.taxa_bitmask(labels=labels) diff --git a/tact/generic_monophyly.py b/tact/generic_monophyly.py index 144704b..f297434 100644 --- a/tact/generic_monophyly.py +++ b/tact/generic_monophyly.py @@ -1,17 +1,15 @@ -# -*- coding: utf-8 -*- - """Module that provides a generic monophyly singleton data object.""" import re from collections import defaultdict -from .tree_util import get_tip_labels -from .tree_util import get_monophyletic_node +from .tree_util import get_monophyletic_node, get_tip_labels _valid_monophyly = {} def init(tree): + """Initialize a monophyly singleton object.""" if len(_valid_monophyly): return _valid_monophyly diff --git a/tact/lib.py b/tact/lib.py index 5f961ad..fd6891f 100644 --- a/tact/lib.py +++ b/tact/lib.py @@ -1,25 +1,19 @@ -# -*- coding: utf-8 -*- - """Functions to handle various numerical operations, including optimization.""" -from __future__ import division - import random import sys from decimal import Decimal as D -from math import exp -from math import log +from math import exp, log import numpy as np -from scipy.optimize import minimize, minimize_scalar, dual_annealing +from scipy.optimize import dual_annealing, minimize, minimize_scalar # Raise on overflow np.seterr(all="raise") def get_bd(r, a): - """ - Converts turnover and relative extinction to birth and death rates. + """Converts turnover and relative extinction to birth and death rates. Args: r (float): turnover or net diversification (birth - death) @@ -32,8 +26,7 @@ def get_bd(r, a): def get_ra(b, d): - """ - Converts birth and death to turnover and relative extinction rates. + """Converts birth and death to turnover and relative extinction rates. Args: b (float): birth rate @@ -46,8 +39,7 @@ def get_ra(b, d): def wrapped_lik_constant(x, sampling, ages): - """ - Wrapper for birth-death likelihood to make optimizing more convenient. + """Wrapper for birth-death likelihood to make optimizing more convenient. Args: x (float, float): turnover, relative extinction @@ -61,8 +53,7 @@ def wrapped_lik_constant(x, sampling, ages): def wrapped_lik_constant_yule(x, sampling, ages): - """ - Wrapper for Yule likelihood to make optimizing more convenient. + """Wrapper for Yule likelihood to make optimizing more convenient. Args: x (float): birth rate @@ -76,9 +67,9 @@ def wrapped_lik_constant_yule(x, sampling, ages): def two_step_optim(func, x0, bounds, args): - """ - Conduct a two-step function optimization, first by using the fast L-BFGS-B method, - and if that fails, use simulated annealing. + """Conduct a two-step function optimization. + + First, use the fast L-BFGS-B method, and if that fails, use simulated annealing. Args: func (callable): function to optimize @@ -104,8 +95,7 @@ def two_step_optim(func, x0, bounds, args): def optim_bd(ages, sampling, min_bound=1e-9): - """ - Optimizes birth and death parameters given a vector of splitting times and sampling fraction. + """Optimizes birth and death parameters given a vector of splitting times and sampling fraction. Args: ages (list): vector of node ages @@ -128,8 +118,7 @@ def optim_bd(ages, sampling, min_bound=1e-9): def optim_yule(ages, sampling, min_bound=1e-9): - """ - Optimizes birth parameter under a Yule model, given a vector of splitting times and sampling fraction. + """Optimizes birth parameter under a Yule model, given a vector of splitting times and sampling fraction. Args: ages (list): vector of node ages @@ -149,7 +138,7 @@ def optim_yule(ages, sampling, min_bound=1e-9): def p0_exact(t, l, m, rho): # noqa: E741 - "Exact version of `p0` using Decimal math." + """Exact version of `p0` using Decimal math.""" t = D(t) l = D(l) # noqa: E741 m = D(m) @@ -158,6 +147,23 @@ def p0_exact(t, l, m, rho): # noqa: E741 def p0(t, l, m, rho): # noqa: E741 + """Compute the probability of no sampled descendants. + + Specifically, this is the probability that an individual alive at time `t` before today has no + sampled extinct or extant descendants, and assumes that there is no sampling in the past. This + can alternatively be interpreted as the probability of sampling zero extant individuals and + potentially infinite extinct individuals. + + This equation is described as remark 3.2 in: + + Stadler, T. (2010). Sampling-through-time in birth-death trees. + Journal of Theoretical Biology, 267(3), 396-404. + + It was originally implemented as `TreePar:::p0`, whose original description was in: + + Stadler, T. (2011). Mammalian phylogeny reveals recent diversification rate shifts. + Proceedings of the National Academy of Sciences, 108(15), 6187-6192. + """ try: return 1 - rho * (l - m) / (rho * l + (l * (1 - rho) - m) * exp(-(l - m) * t)) except FloatingPointError: @@ -189,9 +195,25 @@ def p1_orig(t, l, m, rho): # noqa: E741 def p1(t, l, m, rho): # noqa: E741 - """ - Optimized version of `p1_orig` using common subexpression elimination and strength reduction - from exponentiation to multiplication. + """Compute the probability of exactly one sampled descendant. + + Specifically, the probability that an individual alive at time `t` before today has precisely one sampled + extant descendant and no sampled extinct descendant, and assumes that there is no sampling in the past. + This can alternatively be interpreted as the probability of sampling exactly one extant individual and + potentially infinite extinct individuals. + + This implementation is an optimized version of `p1_orig`, using common subexpression elimination + and strength reduction from exponentiation to multiplication. + + This equation is described as remark 3.2 in: + + Stadler, T. (2010). Sampling-through-time in birth-death trees. + Journal of Theoretical Biology, 267(3), 396-404. + + It was originally implemented as `TreePar:::p1`, whose original description was in: + + Stadler, T. (2011). Mammalian phylogeny reveals recent diversification rate shifts. + Proceedings of the National Academy of Sciences, 108(15), 6187-6192. """ try: ert = np.exp(-(l - m) * t, dtype=np.float64) @@ -216,6 +238,16 @@ def intp1_exact(t, l, m): # noqa: E741 def intp1(t, l, m): # noqa: E741 + """Computes a constant necessary to sample the time of a missing speciation event. + + This constant is not named, but was used in eqn A.2 and called c_2, described in: + + N. Cusimano, T. Stadler, S. Renner. A new method for handling missing + species in diversification analysis applicable to randomly or + non-randomly sampled phylogenies. Syst. Biol., 61(5): 785-792, 2012. + + This function was originally implemented as `TreeSim:::intp1`. + """ try: return (1 - exp(-(l - m) * t)) / (l - m * exp(-(l - m) * t)) except OverflowError: @@ -223,11 +255,10 @@ def intp1(t, l, m): # noqa: E741 def lik_constant(vec, rho, t, root=1, survival=1, p1=p1): - """ - Calculates the likelihood of a constant-rate birth-death process, conditioned - on the waiting times of a phylogenetic tree and degree of incomplete sampling. + """Calculates the likelihood of a constant-rate birth-death process. - Based off of the R function `TreePar::LikConstant` written by Tanja Stadler. + This likelihood function is conditioned on the waiting times of a phylogenetic tree and + degree of incomplete sampling. Based off of the R function `TreePar::LikConstant` written by Tanja Stadler. T. Stadler. On incomplete sampling under birth-death models and connections to the sampling-based coalescent. Jour. Theo. Biol. 261: 58-66, 2009. @@ -238,6 +269,7 @@ def lik_constant(vec, rho, t, root=1, survival=1, p1=p1): t (list): vector of waiting times root (bool): include the root or not? (default: 1) survival (bool): assume survival of the process? (default: 1) + p1: (func): the `p1` function used to compute this likelihood. Returns: (float): likelihood of the birth-death process. @@ -254,9 +286,10 @@ def lik_constant(vec, rho, t, root=1, survival=1, p1=p1): def crown_capture_probability(n, k): - """ - Calculate the probability that a sample of `k` taxa from a clade - of `n` total taxa includes a root node, under a Yule process. + """Calculate the probability of observing the crown node of an incompletely sampled node. + + That is, the probability that a sample of `k` taxa from a clade of `n` total taxa + includes the root (crown) node of the clade, under a Yule process. This equation is taken from: @@ -279,11 +312,10 @@ def crown_capture_probability(n, k): # TODO: This could probably be optimized def get_new_times(ages, birth, death, missing, told=None, tyoung=None): - """ - Simulates new speciation events in an incomplete phylogeny assuming a - constant-rate birth-death process. + """Simulates new speciation events in an incomplete phylogeny. - Adapted from the R function `TreeSim::corsim` written by Tanja Stadler. + Assumes a constant-rate birth-death process. Adapted from the R function `TreeSim::corsim`, + written by Tanja Stadler. N. Cusimano, T. Stadler, S. Renner. A new method for handling missing species in diversification analysis applicable to randomly or @@ -310,7 +342,7 @@ def get_new_times(ages, birth, death, missing, told=None, tyoung=None): ages.sort(reverse=True) times = [x for x in ages if told >= x >= tyoung] - times = [told] + times + [tyoung] + times = [told, *times, tyoung] ranks = range(0, len(times)) only_new = [] while missing > 0: diff --git a/tact/tree_util.py b/tact/tree_util.py index 5f2df78..2387978 100644 --- a/tact/tree_util.py +++ b/tact/tree_util.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Functions specifically to handle DendroPy tree objects.""" import math @@ -8,16 +6,16 @@ import dendropy import portion -from .lib import optim_bd -from .lib import optim_yule from .exceptions import DisjointConstraintError +from .lib import optim_bd, optim_yule def get_birth_death_rates(node, sampfrac, yule=False, include_root=False): - """ - Estimates the birth and death rates for the subtree descending from - `node` with sampling fraction `sampfrac`. Optionally restrict to a - Yule pure-birth model. + """Estimate birth-death rates from a subtree. + + These birth and death rates descend from `node` with sampling fraction `sampfrac`. + + Optionally restrict to a Yule pure-birth model. """ if yule: return optim_yule(get_ages(node, include_root), sampfrac) @@ -35,7 +33,8 @@ def get_monophyletic_node(tree, species): def get_ages(node, include_root=False): - """ + """Get list of ages under a node. + Returns the list of ages of the children of a given `node`, optionally including the `node`'s age if `include_root` is True. """ @@ -54,8 +53,8 @@ def get_tip_labels(tree_or_node): def edge_iter(node, filter_fn=None): - """ - Iterates over the child edge of `node` and all its descendants. + """Iterates over the child edge of `node` and all its descendants. + Can optionally be filtered by `filter_fn`. """ stack = list(node.child_edge_iter()) @@ -67,18 +66,16 @@ def edge_iter(node, filter_fn=None): def get_tree(path, namespace=None): - """ - Gets a DendroPy tree from a path and precalculate its node ages and bipartition bitmask. - """ + """Gets a DendroPy tree from a path and precalculate its node ages and bipartition bitmask.""" tree = dendropy.Tree.get_from_path(path, schema="newick", taxon_namespace=namespace, rooting="default-rooted") update_tree_view(tree) return tree def update_tree_view(tree): - """ - Mutates a DendroPy tree object with updated node ages and bipartition bitmask. We also - correct for minor ultrametricity errors. + """Perform an in-place update of a DendroPy tree object with node ages and bipartition bitmask. + + We also correct for minor ultrametricity errors. Returns a list of tip labels. """ @@ -129,13 +126,11 @@ def compute_node_depths(tree): def graft_node(graft_recipient, graft, stem=False): - """ - Grafts a node `graft` randomly in the subtree below node - `graft_recipient`. The attribute `graft.age` must be set so - we know where is the best place to graft the node. The node - `graft` can optionally have child nodes, in this case the - `edge.length` attribute should be set on all child nodes if - the tree is to remain ultrametric. + """Grafts a node `graft` randomly in the subtree below node `graft_recipient`. + + The attribute `graft.age` must be set so we know where the best place is to graft the node. + The node `graft` can optionally have child nodes, in this case the `edge.length` attribute + should be set on all child nodes if the tree is to remain ultrametric. We graft things "below" a node by picking one of the children of that node and forcing it to be sister to the grafted node @@ -192,9 +187,7 @@ def filter_fn(x): def lock_clade(node, stem=False): - """ - Locks a clade descending from `node` so future grafts will avoid locked edges. - """ + """Locks a clade descending from `node` so future grafts will avoid locked edges.""" for edge in edge_iter(node): edge.label = "locked" if stem: @@ -202,9 +195,7 @@ def lock_clade(node, stem=False): def unlock_clade(node, stem=False): - """ - Unlocks a clade descending from `node` so new tips can be grafted to its edges. - """ + """Unlocks a clade descending from `node` so new tips can be grafted to its edges.""" for edge in edge_iter(node): edge.label = "" if stem: @@ -217,16 +208,14 @@ def count_locked(node): def is_fully_locked(node): - """ - Are all the edges below `node` locked? - """ + """Are all the edges below `node` locked?""" return all(x.label == "locked" for x in edge_iter(node)) def get_min_age(node): - """ - Gets the minimum possible age that could be generated in a clade under `node`, - assuming that grafts to locked edges are restricted. + """Gets the minimum possible age that could be generated in a clade under `node`. + + This assumes that grafts to locked edges are restricted. """ interval = get_age_intervals(node) @@ -240,9 +229,9 @@ def get_min_age(node): def get_age_intervals(node): - """ - Gets the (possibly disjoint) interval that could be generated in the - clade under `node`, assuming that grafts to locked edges are restricted. + """Gets the (possibly disjoint) interval that could be generated in the clade under `node`. + + This assumes that grafts to locked edges are restricted. """ acc = portion.empty() for edge in edge_iter(node, lambda x: x.label != "locked"): diff --git a/tact/validation.py b/tact/validation.py index ec59ecc..2312291 100644 --- a/tact/validation.py +++ b/tact/validation.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Various validation functions for `click` classes and parameters.""" import collections @@ -7,10 +5,7 @@ import click import dendropy -from .tree_util import compute_node_depths -from .tree_util import is_binary -from .tree_util import is_ultrametric -from .tree_util import update_tree_view +from .tree_util import compute_node_depths, is_binary, is_ultrametric, update_tree_view def validate_outgroups(ctx, param, value): @@ -51,8 +46,7 @@ def validate_taxonomy_tree(ctx, param, value): class BackboneCommand(click.Command): - """ - Helper class to validate a Click Command that contains a backbone tree. + """Helper class to validate a Click Command that contains a backbone tree. At a minimum, the Command must contain a `backbone` parameter, which is validated by `validate_newick` and checked to ensure it is a binary tree. @@ -67,10 +61,11 @@ class BackboneCommand(click.Command): """ def validate_backbone_variables(self, ctx, params): + """Validates variables related to the backbone and taxonomy files.""" if "taxonomy" in params: tn = params["taxonomy"].taxon_namespace tn.is_mutable = True - if "outgroups" in params and params["outgroups"]: + if params.get("outgroups"): tn.new_taxa(params["outgroups"]) tn.is_mutable = False try: @@ -82,7 +77,7 @@ def validate_backbone_variables(self, ctx, params): This usually indicates your backbone has species that are not present in your taxonomy. Outgroups not in the taxonomy can be excluded with the --outgroups argument. """ - raise click.BadParameter(msg) + raise click.BadParameter(msg) from None else: backbone = validate_newick(ctx, params, params["backbone"]) @@ -99,12 +94,13 @@ def validate_backbone_variables(self, ctx, params): Increase `--ultrametricity-precision` or use phytools::force.ultrametric in R """ - raise click.BadParameter(msg) + raise click.BadParameter(msg) from None params["backbone"] = backbone return params def make_context(self, *args, **kwargs): - ctx = super(BackboneCommand, self).make_context(*args, **kwargs) + """Set up the proper Click context for a command handler.""" + ctx = super().make_context(*args, **kwargs) ctx.params = self.validate_backbone_variables(ctx, ctx.params) return ctx diff --git a/tests/conftest.py b/tests/conftest.py index 2d98b36..3090141 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,5 @@ -from __future__ import division - import os + import pytest diff --git a/tests/test_add_taxa_integration.py b/tests/test_add_taxa_integration.py index 5703777..e6d5d59 100644 --- a/tests/test_add_taxa_integration.py +++ b/tests/test_add_taxa_integration.py @@ -1,7 +1,7 @@ -import pytest -import sys import os +import sys +import pytest from dendropy import Tree execution_number = range(2) @@ -76,17 +76,17 @@ def test_yule(script_runner, execution_number, datadir): @pytest.mark.parametrize("stem", ["weirdness", "intrusion", "short_branch", "stem"]) def test_monophyly(script_runner, execution_number, datadir, stem): tacted, taxed, bbone = run_tact(script_runner, datadir, stem) - extant = set([x.taxon.label for x in bbone.leaf_nodes()]) + extant = {x.taxon.label for x in bbone.leaf_nodes()} for node in taxed.postorder_internal_node_iter(exclude_seed_node=True): - expected = set([x.taxon.label for x in node.leaf_nodes()]) + expected = {x.taxon.label for x in node.leaf_nodes()} our_extant = extant & expected if len(our_extant) > 0: bbone_node = bbone.mrca(taxon_labels=our_extant) - bbone_tips = set([x.taxon.label for x in bbone_node.leaf_nodes()]) + bbone_tips = {x.taxon.label for x in bbone_node.leaf_nodes()} if bbone_tips != our_extant: continue mrca = tacted.mrca(taxon_labels=expected) - actual = set([x.taxon.label for x in mrca.leaf_nodes()]) + actual = {x.taxon.label for x in mrca.leaf_nodes()} assert expected == actual diff --git a/tests/test_add_toml_integration.py b/tests/test_add_toml_integration.py index 433b050..c27c9b2 100644 --- a/tests/test_add_toml_integration.py +++ b/tests/test_add_toml_integration.py @@ -1,12 +1,14 @@ -import pytest import sys +import pytest from dendropy import Tree execution_number = range(2) -def run_tact(script_runner, tmp_path, config, backbone, args=[]): +def run_tact(script_runner, tmp_path, config, backbone, args=None): + if args is None: + args = [] config_path = tmp_path / "conf.toml" backbone_path = tmp_path / "backbone.tre" config_path.write_text(config) @@ -48,7 +50,7 @@ def test_lone_singleton(script_runner, execution_number, tmp_path, focal_clade): backbone = "((A:1,B:1):1,C:2);" res = run_tact(script_runner, tmp_path, config, backbone) new_tips = [f"{focal_clade} tact {x}" for x in range(10)] - all_tips = set([focal_clade] + new_tips) + all_tips = {focal_clade, *new_tips} mrca_node = res.mrca(taxon_labels=all_tips) - mrca_tips = set([x.taxon.label for x in mrca_node.leaf_iter()]) + mrca_tips = {x.taxon.label for x in mrca_node.leaf_iter()} assert all_tips == mrca_tips diff --git a/tests/test_bd_optim.py b/tests/test_bd_optim.py index 0be00dd..be68ac0 100644 --- a/tests/test_bd_optim.py +++ b/tests/test_bd_optim.py @@ -1,9 +1,7 @@ -from __future__ import division - import math -from hypothesis import given, settings, example import hypothesis.strategies as st +from hypothesis import example, given, settings from tact.lib import optim_bd diff --git a/tests/test_convert.py b/tests/test_convert.py index 5581ae4..aeaaef1 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -1,8 +1,6 @@ -from __future__ import division - -import pytest -from hypothesis import given, assume import hypothesis.strategies as st +import pytest +from hypothesis import assume, given from tact.lib import get_bd, get_ra diff --git a/tests/test_create_clade.py b/tests/test_create_clade.py index c96659c..a5b120a 100644 --- a/tests/test_create_clade.py +++ b/tests/test_create_clade.py @@ -1,9 +1,6 @@ -from __future__ import division - -from hypothesis import given, assume import hypothesis.strategies as st - from dendropy import TaxonNamespace +from hypothesis import assume, given from tact.cli_add_taxa import create_clade from tact.tree_util import edge_iter diff --git a/tests/test_lik_constant.py b/tests/test_lik_constant.py index 9312088..62984aa 100644 --- a/tests/test_lik_constant.py +++ b/tests/test_lik_constant.py @@ -1,9 +1,8 @@ -from __future__ import division -import pytest -from hypothesis import given, assume import hypothesis.strategies as st +import pytest +from hypothesis import assume, given -from tact.lib import p1, p1_orig, p1_exact, lik_constant +from tact.lib import lik_constant, p1, p1_exact, p1_orig def test_lik_constant_exact(benchmark, birth, death, sampling, ages): diff --git a/tests/test_yule.py b/tests/test_yule.py index c5165d1..d10f4df 100644 --- a/tests/test_yule.py +++ b/tests/test_yule.py @@ -1,7 +1,5 @@ -from __future__ import division - -from hypothesis import given, example import hypothesis.strategies as st +from hypothesis import example, given from tact.lib import optim_yule