Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "grumpy"
version = "1.0.1"
version = "1.1.0"
edition = "2021"
description = "Genetic analysis in Rust."
license-file = "LICENSE"
Expand Down
223,881 changes: 223,881 additions & 0 deletions reference/NZ_CP085945.1.gbk

Large diffs are not rendered by default.

12 changes: 2 additions & 10 deletions src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,8 @@ pub struct GeneDef {
pub reverse_complement: bool,

#[pyo3(get, set)]
/// Genome index of the gene start
pub start: i64,

#[pyo3(get, set)]
/// Genome index of the gene end
pub end: i64,
/// Genome indicies for the start/end of each range of the gene
pub ranges: Vec<(i64, i64)>,

#[pyo3(get, set)]
/// Genome index of the gene promoter start
Expand All @@ -149,10 +145,6 @@ pub struct GeneDef {
#[pyo3(get, set)]
/// Number of bases in the promoter
pub promoter_size: i64,

#[pyo3(get, set)]
/// Vec of duplicated positions due to ribosomal shifts
pub ribosomal_shifts: Vec<i64>,
}

#[pyclass(eq)]
Expand Down
67 changes: 31 additions & 36 deletions src/gene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,6 @@ pub struct Gene {
/// Sequence of amino acid numbers coded for by the gene. Empty if gene is non-coding
pub amino_acid_number: Vec<i64>,

#[pyo3(get, set)]
/// Positions of genome indicies duplicated by ribosomal shifts
pub ribosomal_shifts: Vec<i64>,

#[pyo3(get, set)]
/// Codons of the gene. Empty if gene is non-coding
pub codons: Vec<String>,
Expand Down Expand Up @@ -162,30 +158,6 @@ impl Gene {
// Ensure we pick up deletions upstream or downstream of the gene
Gene::adjust_dels(&mut genome_positions, &gene_def);

for pos in gene_def.ribosomal_shifts.iter() {
// Figure out the index of the vectors to insert the ribosomal shift
let mut idx: i64 = -1;
for (i, nc_idx) in nucleotide_index.iter().enumerate() {
if nc_idx == pos {
idx = i as i64;
break;
}
}
if idx == -1 {
panic!(
"Ribosomal shift position {} not found in gene {}",
pos, gene_def.name
);
}
// Duplicate those indices
nucleotide_sequence.insert(
idx as usize,
nucleotide_sequence.chars().nth(idx as usize).unwrap(),
);
nucleotide_index.insert(idx as usize, nucleotide_index[idx as usize]);
genome_positions.insert(idx as usize, genome_positions[idx as usize].clone());
}

if gene_def.reverse_complement {
// Reverse complement the sequence
nucleotide_sequence = nc_sequence
Expand Down Expand Up @@ -228,7 +200,7 @@ impl Gene {
// Insertion
if *pos == 0 {
// Warn the user about a missing insertion, and skip it
println!(
eprintln!(
"Insertion at start of gene {} is revcomp and cannot be adjusted. Skipping!",
gene_def.name
);
Expand Down Expand Up @@ -334,10 +306,20 @@ impl Gene {
}
let prom_end = nucleotide_number.len();
// Now non-promoter
let mut total_length = gene_def
.ranges
.iter()
.map(|(start, end)| (start - end).abs())
.sum::<i64>()
+ (gene_def.ranges.len() as i64 - 1);
if gene_def.ranges.len() > 1 {
total_length += gene_def.ranges.len() as i64 - 1;
} else {
total_length += 1;
}

let mut nc_idx = prom_end;
for i in
1..(gene_def.start - gene_def.end).abs() + gene_def.ribosomal_shifts.len() as i64 + 1
{
for i in 1..total_length {
nucleotide_number.push(i);
if !gene_def.coding {
// No adjustment needed for non-coding as gene pos == nucleotide num
Expand All @@ -364,9 +346,18 @@ impl Gene {
let mut codon_idx = 1;
for (nc_num, i) in (prom_end..nucleotide_sequence.len()).enumerate() {
codon.push(nucleotide_sequence.chars().nth(i).unwrap());
genome_idx_map.insert(nucleotide_index[i], (codon_idx, Some((nc_num % 3) as i64)));
if codon.len() == 3 {
// Codon is complete
genome_idx_map.insert(
nucleotide_index[i - 2],
(codon_idx, Some(((nc_num - 2) % 3) as i64)),
);
genome_idx_map.insert(
nucleotide_index[i - 1],
(codon_idx, Some(((nc_num - 1) % 3) as i64)),
);
genome_idx_map
.insert(nucleotide_index[i], (codon_idx, Some((nc_num % 3) as i64)));
amino_acid_sequence.push(codon_to_aa(codon.clone()));
codons.push(codon.clone());
gene_number.push(codon_idx);
Expand Down Expand Up @@ -407,8 +398,13 @@ impl Gene {
codon = "".to_string();
}
}
if !codon.is_empty() {
panic!("Incomplete codon at end of gene {}", gene_def.name);
if !codon.is_empty() && !gene_def.name.starts_with("INCOMPLETE_") {
// This theoretically should not happen as incomplete genes should be marked,
// but catch these and complain loudly if it does
panic!(
"Incomplete codon at end of gene {}: {:?}",
gene_def.name, gene_def
);
}
}

Expand All @@ -424,7 +420,6 @@ impl Gene {
nucleotide_number,
amino_acid_sequence,
amino_acid_number,
ribosomal_shifts: gene_def.ribosomal_shifts,
codons,
genome_idx_map,
}
Expand Down
Loading