From ff6de6c79730ca896f059b170ef52a0c0e9009bc Mon Sep 17 00:00:00 2001 From: Mossa Date: Mon, 2 Jun 2025 12:23:25 +0200 Subject: [PATCH 1/2] README: added syntax highlighting, fixed lints from `markdownlint`, and formatted the code examples. Plus headers. --- README.md | 377 ++++++++++++++++++++++------------------- heatmap/README.md | 14 +- microBioRust/README.md | 185 ++++++++++---------- seqmetrics/README.md | 2 + 4 files changed, 313 insertions(+), 265 deletions(-) diff --git a/README.md b/README.md index 23dea08..8381102 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # microBioRust + ## A Rust bioinformatics crate aimed at Microbial genomics
+ The aim of this crate is to provide Microbiology friendly Rust functions for bioinformatics.
Very much under construction!
@@ -14,189 +16,211 @@ Questions and comments - please join the Discord server :) [here](https://discor Currently there is functionality for:
- # 1. A Genbank to GFF parser - # 2. An Embl to GFF and GBK parser - # 3. A Heatmap plot with wasm and d3.js +# 1. A Genbank to GFF parser + +# 2. An Embl to GFF and GBK parser + +# 3. A Heatmap plot with wasm and d3.js To use a specific workspace (at the moment microSeqIO or heatmap) clone the project, cd into the specific directory required and build the project from there -for more background please see https://LCrossman.github.io/microBioRust_details +for more background please see In microBioRust: - You can parse genbank files and save as a GFF (gff3) format as well as extracting DNA sequences, gene DNA sequences (ffn) and protein fasta sequences (faa) - Super simple way: -``` +You can parse genbank files and save as a GFF (gff3) format as well as extracting DNA sequences, gene DNA sequences (ffn) and protein fasta sequences (faa) +Super simple way: + +```rust pub fn genbank_to_faa() -> Result<(), anyhow::Error> { - let args = Arguments::parse(); - let records = genbank!(&args.filename); - for record in records.iter() { - for (k, v) in &record.cds.attributes { - if let Some(seq) = record.seq_features.get_sequence_faa(k) { - println!(">{}|{}\n{}", &record.id, &k, seq); - } - } + let args = Arguments::parse(); + let records = genbank!(&args.filename); + for record in records.iter() { + for (k, v) in &record.cds.attributes { + if let Some(seq) = record.seq_features.get_sequence_faa(k) { + println!(">{}|{}\n{}", &record.id, &k, seq); } - return Ok(()); + } + } + return Ok(()); } + ``` - Better for Debugging: +Better for Debugging: + +```rust +pub fn genbank_to_faa() -> Result<(), anyhow::Error> { + let args: Vec = env::args().collect(); + let config = Config::new(&args).unwrap_or_else(|err| { + println!("Problem with parsing file arguments: {}", err); + process::exit(1); + }); + let file_gbk = fs::File::open(config.filename)?; + let mut reader = Reader::new(file_gbk); + let mut records = reader.records(); + let mut cds_counter: u32 = 0; + loop { + //collect from each record advancing on a next record basis, count cds records + match records.next() { + Some(Ok(mut record)) => { + for (k, v) in &record.cds.attributes { + match record.seq_features.get_sequence_faa(&k) { + Some(value) => { + let seq_faa = value.to_string(); + println!(">{}|{}\n{}", &record.id, &k, seq_faa); + } + _ => (), + }; + } + cds_counter += 1; + } + Some(Err(e)) => { + println!("Error encountered - an err {:?}", e); + } + None => { + println!("finished iteration"); + break; + } + } + } + println!("Total records processed: {}", read_counter); + return Ok(()); +} ``` - pub fn genbank_to_faa() -> Result<(), anyhow::Error> { - let args: Vec = env::args().collect(); - let config = Config::new(&args).unwrap_or_else(|err| { - println!("Problem with parsing file arguments: {}", err); - process::exit(1); - }); - let file_gbk = fs::File::open(config.filename)?; - let mut reader = Reader::new(file_gbk); - let mut records = reader.records(); - let mut cds_counter: u32 = 0; - loop { - //collect from each record advancing on a next record basis, count cds records - match records.next() { - Some(Ok(mut record)) => { - for (k, v) in &record.cds.attributes { - match record.seq_features.get_sequence_faa(&k) { - Some(value) => { let seq_faa = value.to_string(); - println!(">{}|{}\n{}", &record.id, &k, seq_faa); - }, - _ => (), - }; - - } - cds_counter+=1; - }, - Some(Err(e)) => { println!("Error encountered - an err {:?}", e); }, - None => { - println!("finished iteration"); - break; }, + +Example to save a provided multi- or single genbank file as a GFF file (by joining any multi-genbank) + +```rust +pub fn genbank_to_gff() -> io::Result<()> { + let args: Vec = env::args().collect(); + let config = Config::new(&args).unwrap_or_else(|err| { + println!("Problem with parsing file arguments: {}", err); + process::exit(1); + }); + let file_gbk = fs::File::open(&config.filename)?; + let prev_start: u32 = 0; + let mut prev_end: u32 = 0; + let mut reader = Reader::new(file_gbk); + let mut records = reader.records(); + let mut read_counter: u32 = 0; + let mut seq_region: BTreeMap = BTreeMap::new(); + let mut record_vec: Vec = Vec::new(); + loop { + match records.next() { + Some(Ok(mut record)) => { + //println!("next record"); + //println!("Record id: {:?}", record.id); + let source = record.source_map.source_name.clone().expect("issue collecting source name"); + let beginning = match record.source_map.get_start(&source) { + Some(value) => value.get_value(), + _ => 0, + }; + let ending = match record.source_map.get_stop(&source) { + Some(value) => value.get_value(), + _ => 0, + }; + if ending + prev_end < beginning + prev_end { } - } - println!("Total records processed: {}", read_counter); - return Ok(()); - } + seq_region.insert(source, (beginning + prev_end, ending + prev_end)); + record_vec.push(record); + // Add additional fields to print if needed + read_counter+=1; + prev_end+=ending; // create the joined record if there are multiple + }, + Some(Err(e)) => { println!("theres an err {:?}", e); }, + None => { + println!("finished iteration"); + break; }, + } + } + let output_file = format!("{}.gff", &config.filename); + gff_write(seq_region.clone(), record_vec, &output_file, true); + println!("Total records processed: {}", read_counter); + return Ok(()); +} ``` - Example to save a provided multi- or single genbank file as a GFF file (by joining any multi-genbank) +Example to create a completely new record, use of setters or set_ functionality +To write into GFF format requires gff_write(seq_region, record_vec, filename, true or false) -``` - pub fn genbank_to_gff() -> io::Result<()> { - let args: Vec = env::args().collect(); - let config = Config::new(&args).unwrap_or_else(|err| { - println!("Problem with parsing file arguments: {}", err); - process::exit(1); - }); - let file_gbk = fs::File::open(&config.filename)?; - let prev_start: u32 = 0; - let mut prev_end: u32 = 0; - let mut reader = Reader::new(file_gbk); - let mut records = reader.records(); - let mut read_counter: u32 = 0; - let mut seq_region: BTreeMap = BTreeMap::new(); - let mut record_vec: Vec = Vec::new(); - loop { - match records.next() { - Some(Ok(mut record)) => { - //println!("next record"); - //println!("Record id: {:?}", record.id); - let source = record.source_map.source_name.clone().expect("issue collecting source name"); - let beginning = match record.source_map.get_start(&source) { - Some(value) => value.get_value(), - _ => 0, - }; - let ending = match record.source_map.get_stop(&source) { - Some(value) => value.get_value(), - _ => 0, - }; - if ending + prev_end < beginning + prev_end { - } - seq_region.insert(source, (beginning + prev_end, ending + prev_end)); - record_vec.push(record); - // Add additional fields to print if needed - read_counter+=1; - prev_end+=ending; // create the joined record if there are multiple - }, - Some(Err(e)) => { println!("theres an err {:?}", e); }, - None => { - println!("finished iteration"); - break; }, - } - } - let output_file = format!("{}.gff", &config.filename); - gff_write(seq_region.clone(), record_vec, &output_file, true); - println!("Total records processed: {}", read_counter); - return Ok(()); -``` - Example to create a completely new record, use of setters or set_ functionality - - To write into GFF format requires gff_write(seq_region, record_vec, filename, true or false) - - The seq_region is the region of interest to save with name and DNA coordinates such as ``` seqregion.entry("source_1".to_string(), (1,897))``` - This makes it possible to save the whole file or to subset it - - record_vec is a list of the records. If there is only one record, include this as a vec using ``` vec![record] ``` - - The boolean true/false describes whether the DNA sequence should be included in the GFF3 file - - To write into genbank format requires gbk_write(seq_region, record_vec, filename), no true or false since genbank format will include the DNA sequence - - ``` - pub fn create_new_record() -> Result<(), anyhow::Error> { - let filename = format!("new_record.gff"); - let mut record = Record::new(); - let mut seq_region: BTreeMap = BTreeMap::new(); - //example from E.coli K12 - seq_region.insert("source_1".to_string(), (1,897)); - //Add the source into SourceAttributes - record.source_map - .set_counter("source_1".to_string()) - .set_start(RangeValue::Exact(1)) - .set_stop(RangeValue::Exact(897)) - .set_organism("Escherichia coli".to_string()) - .set_mol_type("DNA".to_string()) - .set_strain("K-12 substr. MG1655".to_string()) - .set_type_material("type strain of Escherichia coli K12".to_string()) - .set_db_xref("PRJNA57779".to_string()); - //Add the features into FeatureAttributes, here we are setting two features, i.e. coding sequences or genes - record.cds - .set_counter("b3304".to_string()) - .set_start(RangeValue::Exact(1)) - .set_stop(RangeValue::Exact(354)) - .set_gene("rplR".to_string()) - .set_product("50S ribosomal subunit protein L18".to_string()) - .set_codon_start(1) - .set_strand(-1); - record.cds - .set_counter("b3305".to_string()) - .set_start(RangeValue::Exact(364)) - .set_stop(RangeValue::Exact(897)) - .set_gene("rplF".to_string()) - .set_product("50S ribosomal subunit protein L6".to_string()) - .set_codon_start(1) - .set_strand(-1); - //Add the sequences for the coding sequence (CDS) into SequenceAttributes - record.seq_features - .set_counter("b3304".to_string()) - .set_start(RangeValue::Exact(1)) - .set_stop(RangeValue::Exact(354)) - .set_sequence_ffn("ATGGATAAGAAATCTGCTCGTATCCGTCGTGCGACCCGCGCACGCCGCAAGCTCCAGGAG +The seq_region is the region of interest to save with name and DNA coordinates such as `seqregion.entry("source_1".to_string(), (1,897))` + +This makes it possible to save the whole file or to subset it + +record_vec is a list of the records. If there is only one record, include this as a vec using `vec![record]` + +The boolean true/false describes whether the DNA sequence should be included in the GFF3 file + +To write into genbank format requires gbk_write(seq_region, record_vec, filename), no true or false since genbank format will include the DNA sequence + + ```rust +pub fn create_new_record() -> Result<(), anyhow::Error> { + let filename = format!("new_record.gff"); + let mut record = Record::new(); + let mut seq_region: BTreeMap = BTreeMap::new(); + //example from E.coli K12 + seq_region.insert("source_1".to_string(), (1, 897)); + //Add the source into SourceAttributes + record + .source_map + .set_counter("source_1".to_string()) + .set_start(RangeValue::Exact(1)) + .set_stop(RangeValue::Exact(897)) + .set_organism("Escherichia coli".to_string()) + .set_mol_type("DNA".to_string()) + .set_strain("K-12 substr. MG1655".to_string()) + .set_type_material("type strain of Escherichia coli K12".to_string()) + .set_db_xref("PRJNA57779".to_string()); + //Add the features into FeatureAttributes, here we are setting two features, i.e. coding sequences or genes + record + .cds + .set_counter("b3304".to_string()) + .set_start(RangeValue::Exact(1)) + .set_stop(RangeValue::Exact(354)) + .set_gene("rplR".to_string()) + .set_product("50S ribosomal subunit protein L18".to_string()) + .set_codon_start(1) + .set_strand(-1); + record + .cds + .set_counter("b3305".to_string()) + .set_start(RangeValue::Exact(364)) + .set_stop(RangeValue::Exact(897)) + .set_gene("rplF".to_string()) + .set_product("50S ribosomal subunit protein L6".to_string()) + .set_codon_start(1) + .set_strand(-1); + //Add the sequences for the coding sequence (CDS) into SequenceAttributes + record + .seq_features + .set_counter("b3304".to_string()) + .set_start(RangeValue::Exact(1)) + .set_stop(RangeValue::Exact(354)) + .set_sequence_ffn( + "ATGGATAAGAAATCTGCTCGTATCCGTCGTGCGACCCGCGCACGCCGCAAGCTCCAGGAG CTGGGCGCAACTCGCCTGGTGGTACATCGTACCCCGCGTCACATTTACGCACAGGTAATT GCACCGAACGGTTCTGAAGTTCTGGTAGCTGCTTCTACTGTAGAAAAAGCTATCGCTGAA CAACTGAAGTACACCGGTAACAAAGACGCGGCTGCAGCTGTGGGTAAAGCTGTCGCTGAA CGCGCTCTGGAAAAAGGCATCAAAGATGTATCCTTTGACCGTTCCGGGTTCCAATATCAT -GGTCGTGTCCAGGCACTGGCAGATGCTGCCCGTGAAGCTGGCCTTCAGTTCTAA".to_string()) - .set_sequence_faa("MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGSEVLVAASTVEKAIAE -QLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQALADAAREAGLQF".to_string()) - .set_codon_start(1) - .set_strand(-1); - record.seq_features - .set_counter("bb3305".to_string()) - .set_start(RangeValue::Exact(364)) - .set_stop(RangeValue::Exact(897)) - .set_sequence_ffn("ATGTCTCGTGTTGCTAAAGCACCGGTCGTTGTTCCTGCCGGCGTTGACGTAAAAATCAAC +GGTCGTGTCCAGGCACTGGCAGATGCTGCCCGTGAAGCTGGCCTTCAGTTCTAA" + .to_string(), + ) + .set_sequence_faa( + "MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGSEVLVAASTVEKAIAE +QLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQALADAAREAGLQF" + .to_string(), + ) + .set_codon_start(1) + .set_strand(-1); + record + .seq_features + .set_counter("bb3305".to_string()) + .set_start(RangeValue::Exact(364)) + .set_stop(RangeValue::Exact(897)) + .set_sequence_ffn( + "ATGTCTCGTGTTGCTAAAGCACCGGTCGTTGTTCCTGCCGGCGTTGACGTAAAAATCAAC GGTCAGGTTATTACGATCAAAGGTAAAAACGGCGAGCTGACTCGTACTCTCAACGATGCT GTTGAAGTTAAACATGCAGATAATACCCTGACCTTCGGTCCGCGTGATGGTTACGCAGAC GGTTGGGCACAGGCTGGTACCGCGCGTGCCCTGCTGAACTCAATGGTTATCGGTGTTACC @@ -204,14 +228,19 @@ GAAGGCTTCACTAAGAAGCTGCAGCTGGTTGGTGTAGGTTACCGTGCAGCGGTTAAAGGC AATGTGATTAACCTGTCTCTGGGTTTCTCTCATCCTGTTGACCATCAGCTGCCTGCGGGT ATCACTGCTGAATGTCCGACTCAGACTGAAATCGTGCTGAAAGGCGCTGATAAGCAGGTG ATCGGCCAGGTTGCAGCGGATCTGCGCGCCTACCGTCGTCCTGAGCCTTATAAAGGCAAG -GGTGTTCGTTACGCCGACGAAGTCGTGCGTACCAAAGAGGCTAAGAAGAAGTAA".to_string()) - .set_sequence_faa("MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKHADNTLTFGPRDGYAD +GGTGTTCGTTACGCCGACGAAGTCGTGCGTACCAAAGAGGCTAAGAAGAAGTAA" + .to_string(), + ) + .set_sequence_faa( + "MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKHADNTLTFGPRDGYAD GWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLSLGFSHPVDHQLPAG -ITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYADEVVRTKEAKKK".to_string()) - .set_codon_start(1) - .set_strand(-1); - //Add the full sequence of the entire record into the record.sequence - record.sequence = "TTAGAACTGAAGGCCAGCTTCACGGGCAGCATCTGCCAGTGCCTGGACACGACCATGATA +ITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYADEVVRTKEAKKK" + .to_string(), + ) + .set_codon_start(1) + .set_strand(-1); + //Add the full sequence of the entire record into the record.sequence + record.sequence = "TTAGAACTGAAGGCCAGCTTCACGGGCAGCATCTGCCAGTGCCTGGACACGACCATGATA TTGGAACCCGGAACGGTCAAAGGATACATCTTTGATGCCTTTTTCCAGAGCGCGTTCAGC GACAGCTTTACCCACAGCTGCAGCCGCGTCTTTGTTACCGGTGTACTTCAGTTGTTCAGC GATAGCTTTTTCTACAGTAGAAGCAGCTACCAGAACTTCAGAACCGTTCGGTGCAATTAC @@ -225,9 +254,9 @@ TTTAACCGCTGCACGGTAACCTACACCAACCAGCTGCAGCTTCTTAGTGAAGCCTTCGGT AACACCGATAACCATTGAGTTCAGCAGGGCACGCGCGGTACCAGCCTGTGCCCAACCGTC TGCGTAACCATCACGCGGACCGAAGGTCAGGGTATTATCTGCATGTTTAACTTCAACAGC ATCGTTGAGAGTACGAGTCAGCTCGCCGTTTTTACCTTTGATCGTAATAACCTGACCGTT -GATTTTTACGTCAACGCCGGCAGGAACAACGACCGGTGCTTTAGCAACACGAGACAT".to_string(); - gff_write(seq_region, vec![record], &filename, true); - return Ok(()); - } +GATTTTTACGTCAACGCCGGCAGGAACAACGACCGGTGCTTTAGCAACACGAGACAT" + .to_string(); + gff_write(seq_region, vec![record], &filename, true); + return Ok(()); +} ``` - diff --git a/heatmap/README.md b/heatmap/README.md index 8b29b55..cbe8bdd 100644 --- a/heatmap/README.md +++ b/heatmap/README.md @@ -1,17 +1,23 @@ +# `heatmap` + This is functionality for a heatmap data visualisation in Rust WebAssembly calling d3.js D3.js (D3 short for data-driven documents) is a Javascript library for dynamic, interactive data viz in browsers. At the moment the heatmap data is coded into the Rust lib.rs as an example, so it is currently working with fixed data and a rusty colour scheme -To install, you can build with wasm-pack 📦✨ +To install, you can build with wasm-pack 📦✨ -``wasm-pack build --target web`` +```shell +wasm-pack build --target web +``` And serve it locally, for example with: -``http-server .`` +```shell +http-server . +``` -*Installation* +## Installation You can install http-server via brew on MacOSX or with npm diff --git a/microBioRust/README.md b/microBioRust/README.md index 89fa02a..a4fa317 100644 --- a/microBioRust/README.md +++ b/microBioRust/README.md @@ -1,5 +1,7 @@ -# microBioRust +# `microBioRust` + ## A Rust bioinformatics crate aimed at Microbial genomics
+ The aim of this crate is to provide Microbiology friendly Rust functions for bioinformatics.
To use a specific workspace (at the moment microSeqIO or heatmap) clone the project, cd into the specific directory required and build the project from there @@ -8,101 +10,111 @@ In microSeqIO: You can parse genbank files and save as a GFF (gff3) format as well as extracting DNA sequences, gene DNA sequences (ffn) and protein fasta sequences (faa) +```rust -``` - pub fn genbank_to_faa() -> Result<(), anyhow::Error> { - let args: Vec = env::args().collect(); - let config = Config::new(&args).unwrap_or_else(|err| { - println!("Problem with parsing file arguments: {}", err); - process::exit(1); - }); - let file_gbk = fs::File::open(config.filename)?; - let mut reader = Reader::new(file_gbk); - let mut records = reader.records(); - let mut cds_counter: u32 = 0; - loop { - //collect from each record advancing on a next record basis, count cds records - match records.next() { - Some(Ok(mut record)) => { - for (k, v) in &record.cds.attributes { - match record.seq_features.get_sequence_faa(&k) { - Some(value) => { let seq_faa = value.to_string(); - println!(">{}|{}\n{}", &record.id, &k, seq_faa); - }, - _ => (), - }; - - } - cds_counter+=1; - }, - Some(Err(e)) => { println!("Error encountered - an err {:?}", e); }, - None => { - println!("finished iteration"); - break; }, - } - } - println!("Total records processed: {}", read_counter); - return Ok(()); - } +pub fn genbank_to_faa() -> Result<(), anyhow::Error> { + let args: Vec = env::args().collect(); + let config = Config::new(&args).unwrap_or_else(|err| { + println!("Problem with parsing file arguments: {}", err); + process::exit(1); + }); + let file_gbk = fs::File::open(config.filename)?; + let mut reader = Reader::new(file_gbk); + let mut records = reader.records(); + let mut cds_counter: u32 = 0; + loop { + //collect from each record advancing on a next record basis, count cds records + match records.next() { + Some(Ok(mut record)) => { + for (k, v) in &record.cds.attributes { + match record.seq_features.get_sequence_faa(&k) { + Some(value) => { + let seq_faa = value.to_string(); + println!(">{}|{}\n{}", &record.id, &k, seq_faa); + } + _ => (), + }; + } + cds_counter += 1; + } + Some(Err(e)) => { + println!("Error encountered - an err {:?}", e); + } + None => { + println!("finished iteration"); + break; + } + } + } + println!("Total records processed: {}", read_counter); + return Ok(()); +} ``` Example to save a provided multi- or single genbank file as a GFF file (by joining any multi-genbank) - -``` - pub fn genbank_to_gff() -> io::Result<()> { - let args: Vec = env::args().collect(); - let config = Config::new(&args).unwrap_or_else(|err| { - println!("Problem with parsing file arguments: {}", err); - process::exit(1); - }); - let file_gbk = fs::File::open(&config.filename)?; - let prev_start: u32 = 0; - let mut prev_end: u32 = 0; - let mut reader = Reader::new(file_gbk); - let mut records = reader.records(); - let mut read_counter: u32 = 0; - let mut seq_region: BTreeMap = BTreeMap::new(); - let mut record_vec: Vec = Vec::new(); - loop { - match records.next() { - Some(Ok(mut record)) => { - //println!("next record"); - //println!("Record id: {:?}", record.id); - let source = record.source_map.source_name.clone().expect("issue collecting source name"); - let beginning = match record.source_map.get_start(&source) { - Some(value) => value.get_value(), - _ => 0, - }; - let ending = match record.source_map.get_stop(&source) { - Some(value) => value.get_value(), - _ => 0, - }; - if ending + prev_end < beginning + prev_end { - } - seq_region.insert(source, (beginning + prev_end, ending + prev_end)); - record_vec.push(record); - // Add additional fields to print if needed - read_counter+=1; - prev_end+=ending; // create the joined record if there are multiple - }, - Some(Err(e)) => { println!("theres an err {:?}", e); }, - None => { - println!("finished iteration"); - break; }, - } +```rust +pub fn genbank_to_gff() -> io::Result<()> { + let args: Vec = env::args().collect(); + let config = Config::new(&args).unwrap_or_else(|err| { + println!("Problem with parsing file arguments: {}", err); + process::exit(1); + }); + let file_gbk = fs::File::open(&config.filename)?; + let prev_start: u32 = 0; + let mut prev_end: u32 = 0; + let mut reader = Reader::new(file_gbk); + let mut records = reader.records(); + let mut read_counter: u32 = 0; + let mut seq_region: BTreeMap = BTreeMap::new(); + let mut record_vec: Vec = Vec::new(); + loop { + match records.next() { + Some(Ok(mut record)) => { + //println!("next record"); + //println!("Record id: {:?}", record.id); + let source = record + .source_map + .source_name + .clone() + .expect("issue collecting source name"); + let beginning = match record.source_map.get_start(&source) { + Some(value) => value.get_value(), + _ => 0, + }; + let ending = match record.source_map.get_stop(&source) { + Some(value) => value.get_value(), + _ => 0, + }; + if ending + prev_end < beginning + prev_end {} + seq_region.insert(source, (beginning + prev_end, ending + prev_end)); + record_vec.push(record); + // Add additional fields to print if needed + read_counter += 1; + prev_end += ending; // create the joined record if there are multiple } - let output_file = format!("{}.gff", &config.filename); - gff_write(seq_region.clone(), record_vec, &output_file, true); - println!("Total records processed: {}", read_counter); - return Ok(()); + Some(Err(e)) => { + println!("theres an err {:?}", e); + } + None => { + println!("finished iteration"); + break; + } + } + } + let output_file = format!("{}.gff", &config.filename); + gff_write(seq_region.clone(), record_vec, &output_file, true); + println!("Total records processed: {}", read_counter); + return Ok(()); +} ``` + Example to create a completely new record, use of setters or set_ functionality To write into GFF format requires gff_write(seq_region, record_vec, filename, true or false) - The seq_region is the region of interest to save with name and DNA coordinates such as ``` seqregion.entry("source_1".to_string(), (1,897))``` - This makes it possible to save the whole file or to subset it + The seq_region is the region of interest to save with name and DNA coordinates such as ```seqregion.entry("source_1".to_string(), (1,897))``` + This makes it possible to save the whole file or to subset it record_vec is a list of the records. If there is only one record, include this as a vec using ``` vec![record] ``` @@ -110,7 +122,7 @@ In microSeqIO: To write into genbank format requires gbk_write(seq_region, record_vec, filename), no true or false since genbank format will include the DNA sequence - ``` + ```rust pub fn create_new_record() -> Result<(), anyhow::Error> { let filename = format!("new_record.gff"); let mut record = Record::new(); @@ -197,4 +209,3 @@ GATTTTTACGTCAACGCCGGCAGGAACAACGACCGGTGCTTTAGCAACACGAGACAT".to_string(); return Ok(()); } ``` - diff --git a/seqmetrics/README.md b/seqmetrics/README.md index 68d7bf8..3a57273 100644 --- a/seqmetrics/README.md +++ b/seqmetrics/README.md @@ -1 +1,3 @@ +# `seqmetrics` + This is a workspace for generating protein data such as molecular weight and hydrophobicity values From 294451687a0bb7a706e4cf8ac2142c10cc269a15 Mon Sep 17 00:00:00 2001 From: Mossa Date: Mon, 2 Jun 2025 12:23:55 +0200 Subject: [PATCH 2/2] fix: link to repository is broken on crates.io. Plus formatting --- microBioRust/Cargo.toml | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/microBioRust/Cargo.toml b/microBioRust/Cargo.toml index a0b60ca..2da395a 100644 --- a/microBioRust/Cargo.toml +++ b/microBioRust/Cargo.toml @@ -3,12 +3,18 @@ name = "microBioRust" version = "0.1.1" edition = "2021" license = "MIT" -keywords = ["bioinformatics","micro","bio","genomics","sequence-analysis"] +keywords = ["bioinformatics", "micro", "bio", "genomics", "sequence-analysis"] description = "Microbiology friendly bioinformatics Rust functions" -categories = ["science::bioinformatics::sequence-analysis", "science::bioinformatics::genomics", "science::bioinformatics","science","data-structures"] +categories = [ + "science::bioinformatics::sequence-analysis", + "science::bioinformatics::genomics", + "science::bioinformatics", + "science", + "data-structures", +] readme = "README.md" -exclude = [".git",".gitignore"] -repository = "https://github.com/LCrossman/microBioRust/microSeqIO" +exclude = [".git", ".gitignore"] +repository = "https://github.com/LCrossman/microBioRust" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lints.rust] @@ -16,11 +22,11 @@ unsafe_code = "forbid" [dependencies] paste = "1.0" -itertools="0.10.1" -protein-translate="0.2.0" -bio="0.37.1" -anyhow="1.0" -thiserror="1.0" -regex="1.5" +itertools = "0.10.1" +protein-translate = "0.2.0" +bio = "0.37.1" +anyhow = "1.0" +thiserror = "1.0" +regex = "1.5" chrono = "0.4.38" clap = { version = "4.5.19", features = ["derive"] }