diff --git a/bio_files_processor.py b/bio_files_processor.py new file mode 100644 index 0000000..e69de29 diff --git a/biopython_fastq_filter.py b/biopython_fastq_filter.py new file mode 100644 index 0000000..eea8ace --- /dev/null +++ b/biopython_fastq_filter.py @@ -0,0 +1,121 @@ +from Bio import SeqIO +from Bio.SeqUtils import GC + +def filter_fastq(input_path: str, quality_threshold: int, output_filename="final_filtered.fastq", gc_bounds=(40, 60), length_bounds=(50, 350)): + filename = input_path + records = SeqIO.parse(filename, "fastq") + ###quality filter + good_reads = (rec for rec in records if min(rec.letter_annotations["phred_quality"]) >= quality_threshold) + result_quality = SeqIO.write(good_reads, "good_quality.fastq", "fastq") + result_quality_GC = SeqIO.parse("good_quality.fastq", "fastq") + ###GC content filter + min_gc_content = gc_bounds[0] + max_gc_content = gc_bounds[1] + GC_quality_filt = [] + + for sequence in result_quality_GC: + if min_gc_content <= GC(sequence.seq) <= max_gc_content: + GC_quality_filt.append(sequence) + + result_quality = SeqIO.write(GC_quality_filt, "good_quality_GC.fastq", "fastq") + result_quality_GC_length = SeqIO.parse("good_quality_GC.fastq", "fastq") + + ##length filter + filtered_GC_quality_length = [] + + for sequence in result_quality_GC_length: + if len(sequence.seq) >= length_bounds[0] and len(sequence.seq) <= length_bounds[1]: + filtered_GC_quality_length.append(sequence) + + result_quality = SeqIO.write(filtered_GC_quality_length, output_filename, "fastq") + + print(result_quality) + +#filter_fastq("example_fastq.fastq", 15) + + +from abc import ABC, abstractmethod + +class InvalidInputError(ValueError): + pass + +class BiologicalSequence(ABC, str): + @abstractmethod + def __init__(self, seq): + self.seq = seq + + def __len__(self): + return len(self.seq) + + def __getitem__(self, index): + return self.seq[int(index)] + + def __repr__(self): + return __str__(self.seq) + + def check_nucleic_acid(self): + unique_chars = set(self.seq) + nucleotides_dna = set('ATGCatgc') + nucleotides_rna = set('AUGCaugc') + if unique_chars <= nucleotides_dna: + seq = 'dna' + elif unique_chars <= nucleotides_rna: + seq = 'rna' + else: + raise InvalidInputError() + return seq_type + +class NucleicAcidSequence(BiologicalSequence): + def __init__(self, seq): + super().__init__(seq) + self.check_nucleic_acid() + self.length = len(self.seq) + + def complement(self): + list_input = list(self.seq) + for i in range(len(self.seq)): + if list_input[i] in self.complement_dict: + list_input[i] = self.complement_dict[list_input[i]] + return "".join(list_input) + +class DNASequence(NucleicAcidSequence): + complement_dict = {'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G', 'a': 't', 't': 'a', 'g': 'c', 'c': 'g'} + def __init__(self, seq): + super().__init__(seq) + #self.complement() + + def transcribe(self): + list_input = list(self.seq) + for i in range(len(self.seq)): + if (list_input[i] == 'T'): + list_input[i] = 'U' + elif (list_input[i] == 't'): + list_input[i]='u' + return "".join(list_input) + +class RNASequence(NucleicAcidSequence): + complement_dict = {'A': 'U', 'U': 'A', 'G': 'C', 'C': 'G', 'a': 'u', 'u': 'a', 'g': 'c', 'c': 'g'} + def __init__(self, seq): + super().__init__(seq) + #self.complement() + +class AminoAcidSequence(BiologicalSequence): + def __init__(self, seq): + self.seq = seq + + def amino_acid_frequency(self): + """Calculates molecular weight of a protein + Arguments: + - seq (str) 1-letter coded protein sequence + Return: + - int, molecular weight (g/mol) rounded to integer""" + freq_dict = {} + for letter in self.seq: + if letter in freq_dict: + freq_dict[letter] += 1 + else: + freq_dict[letter] = 1 + for letter in freq_dict: + freq_dict[letter] = round(freq_dict[letter] / len(self.seq) * 100, 2) + return freq_dict + diff --git a/example_fastq.fastq b/example_fastq.fastq new file mode 100644 index 0000000..883b51f --- /dev/null +++ b/example_fastq.fastq @@ -0,0 +1,356 @@ +@SRX079804:1:SRR292678:1:1101:21885:21885 1:N:0:1 BH:ok +ACAGCAACATAAACATGATGGGATGGCGTAAGCCCCCGAGATATCAGTTTACCCAGGATAAGAGATTAAATTATGAGCAACATTATTAA ++SRX079804:1:SRR292678:1:1101:21885:21885 1:N:0:1 BH:ok +FGGGFGGGFGGGFGDFGCEBB@CCDFDDFFFFBFFGFGEFDFFFF;D@DD>C@DDGGGDFGDGG?GFGFEGFGGEF@FDGGGFGFBGGD +@SRX079804:1:SRR292678:1:1101:24563:24563 1:N:0:1 BH:failed +ATTAGCGAGGAGGAGTGCTGAGAAGATGTCGCCTACGCCGTTGAAATTCCCTTCAATCAGGGGGTACTGGAGGATACGAGTTTGTGTG ++SRX079804:1:SRR292678:1:1101:24563:24563 1:N:0:1 BH:failed +BFFFFFFFB@B@A<@D>BDDACDDDEBEDEFFFBFFFEFFDFFF=CC@DDFD8FFFFFFF8/+.2,@7<<:?B/:<><-><@.A*C>D +@SRX079804:1:SRR292678:1:1101:30161:30161 1:N:0:1 BH:failed +GAACGACAGCAGCTCCTGCATAACCGCGTCCTTCTTCTTTAGCGTTGTGCAAAGCATGTTTTGTATTACGGGCATCTCGAGCGAATC ++SRX079804:1:SRR292678:1:1101:30161:30161 1:N:0:1 BH:failed +DFFFEGDGGGGFGGEDCCDCEFFFFCCCCCB>CEBFGFBGGG?DE=:6@=>AD?D8DCEE:>EEABE5D@5:DDCA;EEE-DCD +@SRX079804:1:SRR292678:1:1101:47176:47176 1:N:0:1 BH:failed +TGAAGCGTCGATAGAAGTTAGCAAACCCGCGGAACTTCCGTACATCAGACACATTCCGGGGGGTGGGCCAATCCATGATGCCTTTG ++SRX079804:1:SRR292678:1:1101:47176:47176 1:N:0:1 BH:failed +FF@FFBEEEEFFEFFD@EDEFFB=DFEEFFFE8FFE8EEDBFDFEEBE+E46.'8.5::EE:?E>A6@?)>;>9D<C9DEBAAB=5C?<@0=A?D@BDB;:BA?BDDFH?B@DCB6BEBDA??AA9. +@SRX079804:1:SRR292678:1:1101:52180:52180 1:N:0:1 BH:failed +TTTTTGGTTTTAGGTCTAACATGTAAGTCTTTAATCTATTTTGAATTAATTTTTGGATGAGGTGTAAGGAAGGGATCCAGTTTC ++SRX079804:1:SRR292678:1:1101:52180:52180 1:N:0:1 BH:failed +FEFFFFFF=FEEFDFD>EE:?<5@BFAFCFEE7>C>:/6:2<344DA:6DDCDC>)34773DD?@DBA9B +@SRX079804:1:SRR292678:1:1101:105156:105156 1:N:0:1 BH:ok +ACTGCTGAGCTTAAATGGCGGCAGTCTGACGGTTACCAACGGGGGCACTTCAACCGGTTCGTTAACGGGGAGCGGAGAGCTGA ++SRX079804:1:SRR292678:1:1101:105156:105156 1:N:0:1 BH:ok +GFFEGGFGGGGEGGGGGGGGGFDD=DDE7EDD6CD?FEDEE@EBEFEE.DD5DDD@B<7>/0543C?BEE?@@BE@; +@SRX079804:1:SRR292678:1:1101:135168:135168 1:N:0:1 BH:ok +CCTTCCTAAAAATTAAGAATCTTAACAATTAGCAGCACAACCAAAATTATTACCGAAAGGACTTACTCCTCCGCCAAATCCA ++SRX079804:1:SRR292678:1:1101:135168:135168 1:N:0:1 BH:ok +GGGEGGGEGGBFFFFFGCFFEGG6AEEEDEEGGFGGCG4EGFFGDFFBGEGFGGFECBDEGGEFGBFF?CGFBFGGGGFGFF +@SRX079804:1:SRR292678:1:1101:149302:149302 1:N:0:1 BH:changed:1 +TAGGGTTGTATTTGCAGATCCATGGCATGCCAAAAAGAACATCGTCCCGTCCAATATCTGCAACATACCAGTTGGTTGGTA ++SRX079804:1:SRR292678:1:1101:149302:149302 1:N:0:1 BH:changed:1 +@;CBA=:@;@DBDCDEEE/EEEEEEF@>FBEEB=EFA>EEBD=DAEEEEB9)99>B99BC)@,@<9CDD=C,5;B::?@;A +@SRX079804:1:SRR292678:1:1101:170868:170868 2:N:0:1 BH:failed +CTGCCGAGACTGTTCTCAGACATGGAAAGCTCGATTCGCATACACTCGCTGAGTAAGAGAGTCACACCAAATCACAGATT ++SRX079804:1:SRR292678:1:1101:170868:170868 2:N:0:1 BH:failed +E;FFFEGFGIGGFBG;C6D<@C7CDGFEFGFHDFEHHHBBHHFDFEFBAEEEEDE@A2=DA:??C3:@>EEBEEHEFEHHFFHH?FGBGFBBD77B;;C?FFFFGGFED.BBABBG@DBBE +@SRX079804:1:SRR292678:1:1101:190845:190845 1:N:0:1 BH:changed:1 +CCTCAGCGTGGATTGCCGCTCATGCAGGAGCAGATAATCCCTTCGCCATCCCATTAAGCGCCGTTGTCGGTATTCC ++SRX079804:1:SRR292678:1:1101:190845:190845 1:N:0:1 BH:changed:1 +FF@FFCFEECEBEC@@BBBBDFBBFFDFFEFFEB8FFFFFFFFEFCEB/>BBA@AFFFEEEEECE;ACD@DBBEEE +@SRX079804:1:SRR292678:1:1101:198993:198993 2:N:0:1 BH:failed +AGTTATTTATGCATCATTCTCATGTATGAGCCAACAAGATAGTACAAGTTTTATTGCTATGAGTTCAGTACAACA ++SRX079804:1:SRR292678:1:1101:198993:198993 2:N:0:1 BH:failed +<<<=;@B??@<>@><48876EADEG6B.BB@.?+98204<:<>@?A=@EFEFFFEEFB +@SRX079804:1:SRR292678:1:1101:204480:204480 1:N:0:1 BH:failed +AGTGAGACACCCCTGAACATTCCTAGTAAGACATCTTTGAATATTACTAGTTAGCCACACTTTAAAATGACCCG ++SRX079804:1:SRR292678:1:1101:204480:204480 1:N:0:1 BH:failed +<98;<@@@:@CD@BCCDD=DBBCEBBAAA@9???@BCDBCGF=GEGDFGDBEEEEEFFFF=EDEE=DCD@@BBC +@SRX079804:1:SRR292678:1:1101:212327:212327 2:N:0:1 BH:failed +TTACCTCTGCTTTTTCGCCTGTTACTTCTACTAATCCTTCATCTATTGCGAATGGCCCTACTACTGACGAAAT ++SRX079804:1:SRR292678:1:1101:212327:212327 2:N:0:1 BH:failed +DBCCC@@;A@BDCCCE>BBED>GDCDBFBFFEBEECFGGD@@BCB<<8@;09746:@@>@EEECEEDE/FEED +@SRX079804:1:SRR292678:1:1101:230386:230386 1:N:0:1 BH:failed +TTCTGATTGGAGTGAGAGTGCCATTTGTTTCGCTGATTGGACGTTGGAAAGCGCCTTGACCTTTGACAGCAG ++SRX079804:1:SRR292678:1:1101:230386:230386 1:N:0:1 BH:failed +=BDD=DCDBBCDADD@@B;B@CC7C@B@>=BACDD,=??8DDCDD7CCCCDBDEDBDDDDDEEBDDDBD?DB +@SRX079804:1:SRR292678:1:1101:236093:236093 1:N:0:1 BH:failed +CATCTAACTCTCATTCTAGATTCTTAAGTTGGCTACACTTTGCCGTCATTCTCGGTGGATTAGCTATTGGG ++SRX079804:1:SRR292678:1:1101:236093:236093 1:N:0:1 BH:failed +A:@A@;@BB@GGFGG@A@@817729B +@SRX079804:1:SRR292678:1:1101:251912:251912 2:N:0:1 BH:ok +TGTAAATGGAAATGAACCTAATATGTATGCACAAACTATTAAAGCATATCTTGCAAAAGGAGCAATGGCG ++SRX079804:1:SRR292678:1:1101:251912:251912 2:N:0:1 BH:ok +GHFDHHHGHHHFFHHGHHHGGGGGGDEFFEGGGGEFHHFHGFGHHHHFHHDDD6@=DCACFFGGEFBBG +@SRX079804:1:SRR292678:1:1101:278698:278698 1:N:0:1 BH:ok +CTAATAATGGTAATTGAACCATAGAAGATAAGTTCATAATGTAATAAATACATCCATAGAGTTATTAA ++SRX079804:1:SRR292678:1:1101:278698:278698 1:N:0:1 BH:ok +DDBDBCCCDD@FFFB9<<<@DA=DA@B:@=@@AC@GGFCGECFFDGGCGFFGGFFCEBF9>?@>BDFF +@SRX079804:1:SRR292678:1:1101:295878:295878 1:N:0:1 BH:changed:1 +AATGCAAACAGGATGATATTTGAATCCGTAATACTGTTCTTTCATCATAAATAATTTATGCAGATAC ++SRX079804:1:SRR292678:1:1101:295878:295878 1:N:0:1 BH:changed:1 +HHHHHEGFHDGFEGBCBEEEGGGG@EDGCGBBBEGF4?EFDBDDBFE8DEE-E?EE;B@EFC=;FDE +@SRX079804:1:SRR292678:1:1101:306575:306575 1:N:0:1 BH:failed +ATCTTTGAACTTTGTCTCACTTTCCCCCATCTCACCATTCCTCCTGTTCTGTGAACCCCAGTTTCA ++SRX079804:1:SRR292678:1:1101:306575:306575 1:N:0:1 BH:failed +E::EA@E<6B8>97:<6084649?@:?EDED=BEBEGGFEDGECECBDEFDDEEGGEDDFFF +@SRX079804:1:SRR292678:1:1101:403661:403661 1:N:0:1 BH:failed +GATGGCTTTGCTTTCTCATTCTCCTCTCCATCGTTCCCATCTTCGCCCTCAGACGCTGATTGAT ++SRX079804:1:SRR292678:1:1101:403661:403661 1:N:0:1 BH:failed +EG=DFFDFFDDGDGGGGGFFGGGGBADADCEEE5EC>CCCE6BEEEGGGGCBEGGE@9BCEF;>>D@D +@SRX079804:1:SRR292678:1:1101:425870:425870 1:N:0:1 BH:failed +GTACAGCTCTCCTCGTTACCAGCACATCTTGGACACCCGACGAAGACTTTGACATGCTCCTC ++SRX079804:1:SRR292678:1:1101:425870:425870 1:N:0:1 BH:failed +E@EED@EF=D>=EED@D@7DBF +@SRX079804:1:SRR292678:1:1101:429745:429745 1:N:0:1 BH:failed +GGCGATTGTGAAGGCATAAGAGTGGGACATAGTTCAAGTCCAGAACGAATTAAACGCACAA ++SRX079804:1:SRR292678:1:1101:429745:429745 1:N:0:1 BH:failed +BB2B=A@A:BFBFFFFF +@SRX079804:1:SRR292678:1:1101:475293:475293 2:N:0:1 BH:ok +TACATTTGATTTCTTTATAAGATTTCTTACTGTAAAATCATCGCTATTTAACAGCTTATT ++SRX079804:1:SRR292678:1:1101:475293:475293 2:N:0:1 BH:ok +FHFFFFDC@FGFEDGE?EEDC6EEEDEF?EEEE8EHHGGHFGFFEGGGGBFBDBDEBCBCA +@SRX079804:1:SRR292678:1:1101:511594:511594 1:N:0:1 BH:ok +AGCACAATCAGATTCGCTTATGACGGCGATGAAGAAATTGCGATGAAATGTGAGGTGA ++SRX079804:1:SRR292678:1:1101:511594:511594 1:N:0:1 BH:ok +HCHFFHHHGFHBFFFEGFFEFHFEHGBGECHEHB?CDDEFCDBFF9DFCD.BC +@SRX079804:1:SRR292678:1:1101:527839:527839 2:N:0:1 BH:changed:4 +GACATTTCTTTCTGGGAAGGCTTATTAAACGATAAAGATAATGATATACGTTTTGAT ++SRX079804:1:SRR292678:1:1101:527839:527839 2:N:0:1 BH:changed:4 +>35/-;,><04%'A.?4?:>BE.DC-@???CE:@EDFDBDG7B;=<)?1.@?2A5<5 +@SRX079804:1:SRR292678:1:1101:547309:547309 1:N:0:1 BH:failed +AGGTCTGCACTGGCTCTTCAGAGCGCAAGCGAGGGAAGAGAAATATAGTGACGCAC ++SRX079804:1:SRR292678:1:1101:547309:547309 1:N:0:1 BH:failed +GEGFGFD=FDB8B7DDFFFF@/DC@+2:66>-@>9);<2: +@SRX079804:1:SRR292678:1:1101:570367:570367 1:N:0:1 BH:failed +CTATGGCCACTTTTGTATTCTCGATTGAGGTTATTCGCTCACCCATCTTTTCCAA ++SRX079804:1:SRR292678:1:1101:570367:570367 1:N:0:1 BH:failed +FFEE;FFFFBFFFFF<9BEDEBEACDD3DD0B5>>0?:@>FFBEEBEFFFFGCCBCDGIGDGHEFGG=GGGGHFHEHF@FEFE?CGEEEFADEAEC +@SRX079804:1:SRR292678:1:1101:590521:590521 1:N:0:1 BH:failed +CCTCTTCTCTTTCGCGGTCATCTTGGGTTTCGCGCCTTTCTTCTTGACGACAC ++SRX079804:1:SRR292678:1:1101:590521:590521 1:N:0:1 BH:failed +EGFDD@FDF=FDFFEEGGBEFDGF=FGAGEEEEEDFBGGEE8EEE@>5;>90< +@SRX079804:1:SRR292678:1:1101:601307:601307 1:N:0:1 BH:changed:1 +TTGGCGTGCTGATGATTATCGGTATCTTCAAAGGCGCGCAGCCTGCGGGCTG ++SRX079804:1:SRR292678:1:1101:601307:601307 1:N:0:1 BH:changed:1 +GGGEGFGGEGE:EE>GFFGGGGDCGEBFFF>G=EBFFEC?DFGAD?DDECBE +@SRX079804:1:SRR292678:1:1101:631057:631057 1:N:0:1 BH:failed +CACCATTATCCTATTTCTGAACACATTTGACAGTCACGGCACTAGCATTGG ++SRX079804:1:SRR292678:1:1101:631057:631057 1:N:0:1 BH:failed +GGGGGGGGBFGGGEGFGGGGGFGEFFDFFFF?EBFEEDBFGEE@BE;E?E7 +@SRX079804:1:SRR292678:1:1101:654270:654270 1:N:0:1 BH:failed +ATCTTTCTCTCTCTGCTGCATTCTCCGCTTCAGCTCCTCAATTTCAATCA ++SRX079804:1:SRR292678:1:1101:654270:654270 1:N:0:1 BH:failed +GGGGGGEDGGFEGDGCGGBCEFGGGGFBFGGEGGGGGEGFCFEEGGFEDB +@SRX079804:1:SRR292678:1:1101:667761:667761 1:N:0:1 BH:failed +CAGCCTTTTGAGGTCGTCTATCGCAGCGTGTCCGCGACGTTTTGTTGCG ++SRX079804:1:SRR292678:1:1101:667761:667761 1:N:0:1 BH:failed +GGFGGG=GGGG@GFGGG@GGEGFGGGEGGGFGGEG@EEEDE8EE=E=DE +@SRX079804:1:SRR292678:1:1101:671526:671526 1:N:0:1 BH:ok +CAAGTAGAAGGTTAGCGCCTCTCTGTAAAAGGAGTCAAGCGCTATGTC ++SRX079804:1:SRR292678:1:1101:671526:671526 1:N:0:1 BH:ok +HHHHHGHEHHDGGGCGGGGHFGGGGEDGGGGGBEBEBEEEFGHFHFHH +@SRX079804:1:SRR292678:1:1101:685633:685633 2:N:0:1 BH:failed +GCTGTAATGGATCCACTAATTGGGACAGTGGTGGATAAAACGAATAC ++SRX079804:1:SRR292678:1:1101:685633:685633 2:N:0:1 BH:failed +DADADCGFEFEEEGEGGEFDEEEEBDAC;C10<9?AGBGGGFF7DFB +@SRX079804:1:SRR292678:1:1101:769626:769626 1:N:0:1 BH:failed +GTATTCCTTGACAGTCGAAAGAATCACTGCTAACCCAGGC ++SRX079804:1:SRR292678:1:1101:769626:769626 1:N:0:1 BH:failed +C>5BBCCCD=ACDD@A7@@B@A?B?=8B??EDEEDEBDEE +@SRX079804:1:SRR292678:1:1101:776222:776222 1:N:0:1 BH:failed +AGAACGTACACCCTACGCTAAGCAGTGGCTCCATGCCAA ++SRX079804:1:SRR292678:1:1101:776222:776222 1:N:0:1 BH:failed +BGDGGGGFEGGGGEGGGGFGFF6=FCFAFFEE,=C?EDD +@SRX079804:1:SRR292678:1:1101:782183:782183 2:N:0:1 BH:failed +TACGCGTAACGACGTCATAGCCATGACGCTTCAATAAA ++SRX079804:1:SRR292678:1:1101:782183:782183 2:N:0:1 BH:failed +D +@SRX079804:1:SRR292678:1:1101:828383:828383 1:N:0:1 BH:failed +AAACCACATATGACATGAGTGACGGGACTAAAGTTC ++SRX079804:1:SRR292678:1:1101:828383:828383 1:N:0:1 BH:failed +FFEEFEEB=E,C>CDEEEECEBEEEC?F;BDDDDAE +@SRX079804:1:SRR292678:1:1101:829239:829239 1:N:0:1 BH:ok +TCGATCCTTCTGCCTCAAAGTATACTAGGACGCAT ++SRX079804:1:SRR292678:1:1101:829239:829239 1:N:0:1 BH:ok +GGGDFGGBGFFEBFEDCBCDCGGGGBEEE=GE?EE +@SRX079804:1:SRR292678:1:1101:868419:868419 1:N:0:1 BH:ok +ATTCGTCAGGCCCAATAACATCATGAATTTCCAG ++SRX079804:1:SRR292678:1:1101:868419:868419 1:N:0:1 BH:ok +DEEEEEEEBDFFFFFFFF8FEED8@FFFBFFEFF +@SRX079804:1:SRR292678:1:1101:892716:892716 2:N:0:1 BH:ok +AATTTACCTAATGGAATCAATGAGGCTACTCCA ++SRX079804:1:SRR292678:1:1101:892716:892716 2:N:0:1 BH:ok +@BCBBBCCBCCCCCCCDDAAFFGEEEDBF@EE@ +@SRX079804:1:SRR292678:1:1101:893159:893159 2:N:0:1 BH:ok +TTCTCTGCTTTTCATATCTTGTCATAAAAATT ++SRX079804:1:SRR292678:1:1101:893159:893159 2:N:0:1 BH:ok +CBDCDEEEEEBEEEEGDDFDEEEEDGFFEGFE +@SRX079804:1:SRR292678:1:1101:918742:918742 1:N:0:1 BH:failed +CTCTCCATGCACAAAGAATATCACAGCCAAA ++SRX079804:1:SRR292678:1:1101:918742:918742 1:N:0:1 BH:failed +EEEBA?@;B@EEE@BEE=?EDDDDADCDA?E +@SRX079804:1:SRR292678:1:1101:923787:923787 2:N:0:1 BH:ok +TTGTGAAGGATGGGATATTAGTGTAGATGA ++SRX079804:1:SRR292678:1:1101:923787:923787 2:N:0:1 BH:ok +EEBBEGEEE=BBB<@DCDCGD@D>=DEGEE +@SRX079804:1:SRR292678:1:1101:933189:933189 1:N:0:1 BH:failed +GTCTGCACTATCGAGGGCTGTGCCTTTGC ++SRX079804:1:SRR292678:1:1101:933189:933189 1:N:0:1 BH:failed +FEFFDBFF8FE>?DFFFCEBCEEBBEDE6 +@SRX079804:1:SRR292678:1:1101:937136:937136 1:N:0:1 BH:failed +TTTCTTTGGCTTAAAGATAGTTTTAGTC ++SRX079804:1:SRR292678:1:1101:937136:937136 1:N:0:1 BH:failed +EFFFEEEEFCBCDDDDE@/E?@@7@@3< +@SRX079804:1:SRR292678:1:1101:940351:940351 1:N:0:1 BH:changed:1 +TGCCGTGGGAATGACAAACAAGCATCC ++SRX079804:1:SRR292678:1:1101:940351:940351 1:N:0:1 BH:changed:1 +DECC@GFFBF=EBEAFDFGD?FFF8FF +@SRX079804:1:SRR292678:1:1101:940693:940693 1:N:0:1 BH:failed +CACATTATGAACTATGGGCACTGCAT ++SRX079804:1:SRR292678:1:1101:940693:940693 1:N:0:1 BH:failed +EEEGFDEDFEGGGGGFEGBGGGFGGG +@SRX079804:1:SRR292678:1:1101:955819:955819 1:N:0:1 BH:failed +CACCTAGCAGCAACGGACGAGTCAG ++SRX079804:1:SRR292678:1:1101:955819:955819 1:N:0:1 BH:failed +GGGGGEEEGGEGGGFGEGG;F@EFF +@SRX079804:1:SRR292678:1:1101:958051:958051 2:N:0:1 BH:ok +TTAATATTTCCATCTGAACTTCGC ++SRX079804:1:SRR292678:1:1101:958051:958051 2:N:0:1 BH:ok +EDDBGFEGFGHHFHGGEDEGBGDB +@SRX079804:1:SRR292678:1:1101:996098:996098 1:N:0:1 BH:failed +CTAAGAGAGTTTGTAATGCGGAC ++SRX079804:1:SRR292678:1:1101:996098:996098 1:N:0:1 BH:failed +DD=DBDBDC4EFFFD@?CD@ACD +@SRX079804:1:SRR292678:1:1101:1020278:1020278 2:N:0:1 BH:ok +AAAGTGCAGAACATGCAGATAT ++SRX079804:1:SRR292678:1:1101:1020278:1020278 2:N:0:1 BH:ok +D>AC?GDDCD?DDADE@GABDG +@SRX079804:1:SRR292678:1:1101:1022234:1022234 1:N:0:1 BH:ok +GACCTTTCCGCAAGCTGTCGC ++SRX079804:1:SRR292678:1:1101:1022234:1022234 1:N:0:1 BH:ok +HHHHFEHHHHGGHHHGDHEEG +@SRX079804:1:SRR292678:1:1101:1024144:1024144 1:N:0:1 BH:failed +TCCATTATGAAAGAAGAAAA ++SRX079804:1:SRR292678:1:1101:1024144:1024144 1:N:0:1 BH:failed +@A><96:6: +@SRX079804:1:SRR292678:1:1101:1175112:1175112 1:N:0:1 BH:failed +AGGCC ++SRX079804:1:SRR292678:1:1101:1175112:1175112 1:N:0:1 BH:failed +EC8EE +@SRX079804:1:SRR292678:1:1101:1182927:1182927 1:N:0:1 BH:changed:1 +TGAA ++SRX079804:1:SRR292678:1:1101:1182927:1182927 1:N:0:1 BH:changed:1 +GGBH +@SRX079804:1:SRR292678:1:1101:1243474:1243474 1:N:0:1 BH:ok +ACG ++SRX079804:1:SRR292678:1:1101:1243474:1243474 1:N:0:1 BH:ok +EEE +@SRX079804:1:SRR292678:1:1101:1266246:1266246 1:N:0:1 BH:failed +AA ++SRX079804:1:SRR292678:1:1101:1266246:1266246 1:N:0:1 BH:failed +C@ +@SRX079804:1:SRR292678:1:1101:1269735:1269735 1:N:0:1 BH:failed +C ++SRX079804:1:SRR292678:1:1101:1269735:1269735 1:N:0:1 BH:failed +G diff --git a/misc_module b/misc_module new file mode 160000 index 0000000..d29d55c --- /dev/null +++ b/misc_module @@ -0,0 +1 @@ +Subproject commit d29d55c0582e79c291546bcf9594f22155295f5e diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..10211e5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +bioframe==0.5.1 +biopython==1.81