From cd6ba4d333822cb7602482dd2b7da0980e9b5817 Mon Sep 17 00:00:00 2001 From: Gleb Bobkov Date: Sun, 25 Feb 2024 02:58:08 +0300 Subject: [PATCH 01/10] transfered everything to the main.py --- main.py | 476 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 471 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index a0a22d5..e26000e 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,471 @@ -import moduls.dna_rna_tools -import moduls.fastq_tools -import moduls.protein_tools - - +def transcribe(sequns: str) -> str: + """ + That function transcribe DNA to RNA + :param sequns: DNA sequence + :return: RNA sequence + """ + return "".join([transcription_dict[i] for i in sequns]) + + +def reverse(sequns: str) -> str: + """ + That function reverses DNA sequence + :param sequns:DNA sequense + :return: reversed DNA sequence + """ + return sequns[::-1] + + +def complement(sequns: str) -> str: + """ + That function create a complementarity chain fot the DNA sequence + :param sequns: DNA sequense + :return: complementarity chain to the DNA sequence + """ + return "".join([complement_dict[i] for i in sequns]) + + +def reverse_complement(sequns: str) -> str: + """ + That function create reverse complementarity chain for the DNA sequence + :param sequns: sequense + :return: reversed complimentarity chain to the DNA sequence + """ + rev_sequns = sequns[::-1] + return "".join([complement_dict[i] for i in rev_sequns]) + + +operations = { + "transcribe": transcribe, + "reverse": reverse, + "complement": complement, + "reverse_complement": reverse_complement, +} + + +transcription_dict = { + "A": "U", + "a": "u", + "T": "A", + "t": "a", + "G": "C", + "g": "c", + "C": "g", + "c": "g", +} + + +complement_dict = { + "A": "T", + "a": "t", + "T": "A", + "t": "a", + "G": "C", + "g": "c", + "C": "g", + "c": "g", +} + + +def dna_rna_tools(*args): + *sequnses, def_name = args + for sequns in sequnses: + if RNA_checking(sequns) == True: + continue + else: + raise ValueError("Incorrect options input, please try again") + result = [] + for sequns in sequnses: + res = operations[def_name](sequns) + result.append(res) + return result + + +from os import makedirs +import os.path + + +def GC_cont_check(sequence_for_filtering: str, gc_bounds: list) -> bool: + """ + Check the GC content of the sequence for filtering + :param sequence_for_filtering: analyzed sequence + :param gc_bounds: list with limitation for GC content + :return: bool argument for filtering in fasta_filtering function + """ + GC_counter = 0 + for i in sequence_for_filtering: + if i == "C" or i == "G" or i == "c" or i == "g": + GC_counter += 1 + GC_calculc = GC_counter / len(sequence_for_filtering) * 100 + if len(gc_bounds) == 1: + if GC_calculc <= gc_bounds[0]: + return True + if len(gc_bounds) == 2: + if GC_calculc <= gc_bounds[1] and GC_calculc >= gc_bounds[0]: + return True + + +def lenght_chech(sequence_for_filtering: str, length_bounds: list) -> bool: + """ + Check the lenght of the sequence for filtering + :param sequence_for_filtering: analyzed sequence + :param length_bounds: list with limitations for lenght + :return: bool argument for filtering in fasta_filtering function + """ + if len(length_bounds) == 1: + if len(sequence_for_filtering) <= length_bounds[0]: + return True + if len(length_bounds) == 2: + if ( + len(sequence_for_filtering) <= length_bounds[1] + and len(sequence_for_filtering) >= length_bounds[0] + ): + return True + + +def quality_chech( + quality_of_sequence_for_filterring: str, quality_threshold: int +) -> bool: + """ + Check the quality of the sequence for filtering + :param quality_of_sequence_for_filterring: 2nd key for the sequense with quality of each nucleotide reading + :param quality_threshold: limitation for the quality of nucleotides reading + :return: bool argument for filtering in fasta_filtering function + """ + quality_threshold = str(quality_threshold) + quality_counter = 0 + for i in quality_of_sequence_for_filterring: + quality_counter += ord(i) + quality = quality_counter / len(quality_of_sequence_for_filterring) + if quality >= ord(quality_threshold): + return True + + +def seqs_creation(input_path_input: str) -> dict: + """ + Reading of the input file and creating the dictinary of the seqs + with structure + {name' : ('sequence', 'comment' 'quality') + } + :param input_path_input: + :return: dictinary of esquenses + """ + + path_input = str(input_path_input) + inline_new_dit_fasta = {} + outline_new_dict_fasta = {} + py_file = open(path_input) + lines = py_file.readlines() + i = 0 + seq = {} + seqs = {} + while i != (len(lines)): + key = lines[i] + key = key[:-1] + value_1 = lines[i + 1] + value_1 = value_1[:-1] + value_3 = lines[i + 2] + value_3 = value_3[:-1] + value_2 = lines[i + 3] + if value_2[-1] == "\n": + value_2 = value_2[:-1] + value = [value_1, value_3, value_2] + seq[key] = value + seqs = {**seq} + i += 4 + return seqs + + +def output_creating(input_path: str, output_filename: str, outline_new_dict_fasta: str): + """ + Create the output file and folder for the file + :param input_path: the name of input file for extracting sequenses + :param output_filename: the name of output file for writing sequenses + :param outline_new_dict_fasta: the name of the folder fot the keeping output filename + :return: nothing + """ + if output_filename != None: + os.makedirs("fastq_filtrator_resuls", exist_ok=True) + file_for_output_filename = ( + "fastq_filtrator_resuls/" + output_filename + ".fastq" + ) + with open(file_for_output_filename, mode="w") as f: + for key, value in outline_new_dict_fasta.items(): + f.write(key + "\n") + f.write(value[0] + "\n") + f.write(value[1] + "\n") + f.write(value[2] + "\n") + else: + os.makedirs("fastq_filtrator_resuls", exist_ok=True) + file_for_output_filename = "fastq_filtrator_resuls/" + input_path + with open(file_for_output_filename, mode="w") as f: + for key, value in outline_new_dict_fasta.items(): + f.write(key + "\n") + f.write(value[0] + "\n") + f.write(value[1] + "\n") + f.write(value[2] + "\n") + + +def fasta_filtering( + seqs, gc_bounds=(0, 100), length_bounds=(0, 2**32), quality_threshold=0 +): + if type(gc_bounds) != tuple: + gc_bounds = (0, gc_bounds) + if type(length_bounds) != tuple: + length_bounds = (0, length_bounds) + inline_new_dit_fasta = {} + for key, value in seqs.items(): + sequence_for_filtering = value[0] + quality_of_sequence_for_filterring = value[1] + if ( + GC_cont_check(sequence_for_filtering, gc_bounds) + and lenght_chech(sequence_for_filtering, length_bounds) + and quality_chech(quality_of_sequence_for_filterring, quality_threshold) + ): + inline_new_dit_fasta[key] = value + return inline_new_dict_fasta + + + + +def length_info(protein: str) -> int: + """ + Сounting the length of an amino acid sequence/protein in the number of amino acids + :param protein: sequence of protein + :return: number of amino acids in an amino acid sequence/protein + """ + return len(protein) + + +def count_percentage_aa(seq: str) -> dict: + """ + Count percentage of each amino acid in sequence + arguments: + - seq (str): sequence for counting + return: + - dict: dictionary with counted percentage + """ + l = count_length(seq) + result = {} + for aa in seq: + if aa not in result: + result[aa] = 1 + else: + result[aa] += 1 + result.update((key, round(value / l * 100, 2)) for key, value in result.items()) + res = { + key: value + for key, value in sorted(result.items(), key=lambda item: item[1], reverse=True) + } + return res + + +def compare_pattern(sequence: str, pattern: str) -> bool: + """ + Compare a given pattern to a fragment of sequence of the same length + arguments: + - sequence (str): sequence fragment to compare with the pattern + - pattern (str): pattern for comparison + return: + - (bool): whether pattern and fragment match + """ + for i in range(0, len(sequence)): + if not sequence[i] == pattern[i]: + return False + return True + + +def find_pattern(sequences: list, pattern: str) -> dict: + """ + Find all non-overlaping instances of a given pattern in sequences + arguments: + - sequences (list): sequences to find the pattern in + - pattern (str): pattern in question + return + - finds(dict): dictionary with sequences as keys and lists of indexes of patterns and the number of patterns as values + """ + finds = {} + for j in range(0, len(sequences)): + find = [] + for i in range(0, len(sequences[j])): + if compare_pattern(sequences[j][i : i + len(pattern)], pattern): + find.append(i) + i += len(pattern) + finds[sequences[j]] = [len(find)] + find + return finds + + +def get_protein_gene(protein): + """ + Transforming of an amino acid sequence/protein to DNA sequence + :param protein: amino acid sequence of protein + :return: sequence of protein in the DNA sequence form + """ + return "".join([retrnaslation_dict[aa] for i in protein]) + + +def rename_three_letter_name(seqs: list, sep="") -> list: + """ + Transform into a three-letter amino acids entry. + arguments: + - seqs (list): list of sequences for transforming to three-letter entire + - sep (str): separator between aminoacids, default = '' + return: + - list: transformed sequences with separators + """ + res = [] + for seq in seqs: + threel_form = "" + for aa in seq: + threel_form = threel_form + threel[aa] + sep + if sep: + threel_form = threel_form[:-1] + res.append(threel_form) + return res + + +coperations = { + "length": length_info, + "percentage": count_percentage_aa, + "pattern": find_pattern, + "3Letter_name": rename_three_letter_name, + "DNA_code": get_protein_gene, +} + + +retrnaslation_dict = { + "F": "TTC", + "f": "ttc", + "L": "TTA", + "l": "tta", + "S": "TCG", + "s": "tcg", + "Y": "TAC", + "y": "tac", + "C": "TGC", + "c": "tgc", + "W": "TGG", + "w": "tgg", + "P": "CCC", + "p": "ccc", + "H": "CAT", + "h": "cat", + "Q": "GAA", + "q": "gaa", + "R": "CGA", + "r": "cga", + "I": "ATT", + "i": "att", + "M": "ATG", + "m": "atg", + "T": "ACC", + "t": "acc", + "N": "AAT", + "n": "aat", + "K": "AAA", + "k": "aaa", + "V": "GTT", + "v": "gtt", + "A": "GCA", + "a": "gca", + "D": "GAT", + "d": "gca", + "E": "GAG", + "e": "gag", + "G": "GGG", + "g": "ggg", +} + + +threel = { + "A": "ALA", + "R": "ARG", + "N": "ASN", + "D": "ASP", + "V": "VAL", + "H": "HIS", + "G": "GLY", + "Q": "GLN", + "E": "GLU", + "I": "ILE", + "L": "LEU", + "K": "LYS", + "M": "MET", + "P": "PRO", + "S": "SER", + "Y": "TYR", + "T": "THR", + "W": "TRP", + "F": "PHE", + "C": "CYS", + "a": "ala", + "r": "arg", + "n": "asn", + "d": "asp", + "v": "val", + "h": "his", + "g": "gly", + "q": "gln", + "e": "glu", + "i": "ile", + "l": "leu", + "k": "lys", + "m": "met", + "p": "pro", + "s": "ser", + "y": "tyr", + "t": "thr", + "w": "trp", + "f": "phe", + "c": "cys", +} + + +def protein_tool(*proteins, options=None): + proteins = list(proteins) + + operations = { + "compare": compare, + "length": length_info, + "percentage": count_percentage_aa, + "pattern": find_pattern, + "3Letter_name": rename_three_letter_name, + "DNA_code": get_protein_gene, + } + + if options == "compare": + result = operations[options](proteins[:-2], proteins[-2], proteins[-1]) + return result + elif options == "3Letter_name": + result = operations[options](proteins[:-1], proteins[-1]) + return result + elif options == "length" or options == "percentage" or options == "DNA_code": + result = [] + for protein in proteins: + res = operations[options](protein) + result.append(res) + return result + else: + raise ValueError("Incorrect options input, please try again") + + +from Bio import SeqIO +#from Bio.SeqUtils import GC + +class FastQFilter: + def __init__(self, input_file, output_file, min_length, min_quality, min_gc): + self.input_file = input_file + self.output_file = output_file + self.min_length = min_length + self.min_quality = min_quality + self.min_gc = min_gc + + def filter_fastq(self): + with open(self.output_file, 'w') as output_handle: + for record in SeqIO.parse(self.input_file, 'fastq'): + if ( + len(record.seq) >= self.min_length + and min(record.letter_annotations["phred_quality"]) >= self.min_quality + and Bio.SeqUtils.GC(record.seq) >= self.min_gc + ): + SeqIO.write(record, output_handle, 'fastq') \ No newline at end of file From 81f42051e69c5530aa0c6709723d93361361ecf1 Mon Sep 17 00:00:00 2001 From: Gleb Bobkov Date: Sun, 25 Feb 2024 03:00:13 +0300 Subject: [PATCH 02/10] moduls delited --- moduls/dna_rna_tools.py | 71 ----------------- moduls/fastq_tools.py | 129 ------------------------------- moduls/protein_tools.py | 163 ---------------------------------------- 3 files changed, 363 deletions(-) delete mode 100644 moduls/dna_rna_tools.py delete mode 100644 moduls/fastq_tools.py delete mode 100644 moduls/protein_tools.py diff --git a/moduls/dna_rna_tools.py b/moduls/dna_rna_tools.py deleted file mode 100644 index 49fbe83..0000000 --- a/moduls/dna_rna_tools.py +++ /dev/null @@ -1,71 +0,0 @@ -operations = { - 'transcribe': transcribe, - 'reverse': reverse, - 'complement': complement, - 'reverse_complement': reverse_complement - } - -transcription_dict = { - 'A': 'U', 'a': 'u', - 'T': 'A', 't': 'a', - 'G': 'C', 'g': 'c', - 'C': 'g', 'c': 'g' -} - - -complement_dict = { - 'A': 'T', 'a': 't', - 'T': 'A', 't': 'a', - 'G': 'C', 'g': 'c', - 'C': 'g', 'c': 'g' -} - - -def transcribe(sequns: str) -> str: - """ - That function transcribe DNA to RNA - :param sequns: DNA sequence - :return: RNA sequence - """ - return ''.join([transcription_dict[i] for i in sequns]) - - -def reverse (sequns: str) -> str: - """ - That function reverses DNA sequence - :param sequns:DNA sequense - :return: reversed DNA sequence - """ - return sequns[::-1] - -def complement (sequns: str) -> str: - """ - That function create a complementarity chain fot the DNA sequence - :param sequns: DNA sequense - :return: complementarity chain to the DNA sequence - """ - return ''.join([complement_dict[i] for i in sequns]) - - -def reverse_complement (sequns: str) -> str: - """ - That function create reverse complementarity chain for the DNA sequence - :param sequns: sequense - :return: reversed complimentarity chain to the DNA sequence - """ - rev_sequns = sequns[::-1] - return ''.join([complement_dict[i] for i in rev_sequns]) - - -def dna_rna_tools (*args): - *sequnses, def_name = args - for sequns in sequnses: - if RNA_checking(sequns) == True: - continue - else: - raise ValueError('Incorrect options input, please try again') - result = [] - for sequns in sequnses: - res = operations[def_name](sequns) - result.append(res) - return (result) diff --git a/moduls/fastq_tools.py b/moduls/fastq_tools.py deleted file mode 100644 index 20a76aa..0000000 --- a/moduls/fastq_tools.py +++ /dev/null @@ -1,129 +0,0 @@ -from os import makedirs -import os.path - -def GC_cont_check(sequence_for_filtering: str , gc_bounds: list) -> bool: - """ - Check the GC content of the sequence for filtering - :param sequence_for_filtering: analyzed sequence - :param gc_bounds: list with limitation for GC content - :return: bool argument for filtering in fasta_filtering function - """ - GC_counter = 0 - for i in sequence_for_filtering: - if i == "C" or i == "G" or i == "c" or i == "g": - GC_counter += 1 - GC_calculc = GC_counter/len(sequence_for_filtering)*100 - if len(gc_bounds)==1: - if GC_calculc <= gc_bounds[0]: - return True - if len(gc_bounds) == 2: - if GC_calculc <= gc_bounds[1] and GC_calculc >= gc_bounds[0]: - return True - -def lenght_chech (sequence_for_filtering: str, length_bounds: list) -> bool: - """ - Check the lenght of the sequence for filtering - :param sequence_for_filtering: analyzed sequence - :param length_bounds: list with limitations for lenght - :return: bool argument for filtering in fasta_filtering function - """ - if len(length_bounds)==1: - if len(sequence_for_filtering) <= length_bounds[0]: - return True - if len(length_bounds) == 2: - if len(sequence_for_filtering) <= length_bounds[1] and len(sequence_for_filtering) >= length_bounds[0]: - return True - -def quality_chech (quality_of_sequence_for_filterring: str, quality_threshold: int) -> bool: - """ - Check the quality of the sequence for filtering - :param quality_of_sequence_for_filterring: 2nd key for the sequense with quality of each nucleotide reading - :param quality_threshold: limitation for the quality of nucleotides reading - :return: bool argument for filtering in fasta_filtering function - """ - quality_threshold = str(quality_threshold) - quality_counter = 0 - for i in quality_of_sequence_for_filterring: - quality_counter += ord(i) - quality = quality_counter/len(quality_of_sequence_for_filterring) - if quality >= ord(quality_threshold): - return True - - -def seqs_creation(input_path_input: str) -> dict: - """ - Reading of the input file and creating the dictinary of the seqs - with structure - {name' : ('sequence', 'comment' 'quality') - } - :param input_path_input: - :return: dictinary of esquenses - """ - - path_input = str(input_path_input) - inline_new_dit_fasta = {} - outline_new_dict_fasta = {} - py_file = open (path_input) - lines = py_file.readlines() - i = 0 - seq = {} - seqs = {} - while i != (len(lines)): - key = lines[i] - key = key[:-1] - value_1 = lines[i+1] - value_1 = value_1 [:-1] - value_3 = lines[i+2] - value_3 = value_3[:-1] - value_2 = lines[i+3] - if value_2[-1] == '\n': - value_2 = value_2[:-1] - value = [value_1, value_3 ,value_2] - seq[key] = value - seqs = {**seq} - i += 4 - return seqs - - -def output_creating (input_path: str, output_filename: str, outline_new_dict_fasta: str): - """ - Create the output file and folder for the file - :param input_path: the name of input file for extracting sequenses - :param output_filename: the name of output file for writing sequenses - :param outline_new_dict_fasta: the name of the folder fot the keeping output filename - :return: nothing - """ - if output_filename != None: - os.makedirs('fastq_filtrator_resuls', exist_ok=True) - file_for_output_filename = 'fastq_filtrator_resuls/' + output_filename + '.fastq' - with open(file_for_output_filename, mode='w') as f: - for key, value in outline_new_dict_fasta.items(): - f.write(key + '\n') - f.write(value[0] + '\n') - f.write(value[1] + '\n') - f.write(value[2] + '\n') - else: - os.makedirs('fastq_filtrator_resuls', exist_ok=True) - file_for_output_filename = 'fastq_filtrator_resuls/' + input_path - with open(file_for_output_filename, mode='w') as f: - for key, value in outline_new_dict_fasta.items(): - f.write(key + '\n') - f.write(value[0] + '\n') - f.write(value[1] + '\n') - f.write(value[2] + '\n') - - -def fasta_filtering(seqs, gc_bounds = (0, 100), length_bounds = (0, 2**32), quality_threshold = 0): - if type(gc_bounds) != tuple: - gc_bounds = (0, gc_bounds) - if type(length_bounds) != tuple: - length_bounds = (0, length_bounds) - inline_new_dit_fasta = {} - for key, value in seqs.items(): - sequence_for_filtering = value[0] - quality_of_sequence_for_filterring = value[1] - if GC_cont_check(sequence_for_filtering, gc_bounds) and lenght_chech(sequence_for_filtering, length_bounds) and quality_chech(quality_of_sequence_for_filterring, quality_threshold): - inline_new_dit_fasta[key]= value - return (inline_new_dict_fasta) - - diff --git a/moduls/protein_tools.py b/moduls/protein_tools.py deleted file mode 100644 index 9e7382e..0000000 --- a/moduls/protein_tools.py +++ /dev/null @@ -1,163 +0,0 @@ -coperations = { - 'length': length_info, - 'percentage': count_percentage_aa, - 'pattern': find_pattern, - '3Letter_name': rename_three_letter_name, - 'DNA_code': get_protein_gene - } - - -retrnaslation_dict = { - 'F': 'TTC', 'f': 'ttc', - 'L': 'TTA', 'l': 'tta', - 'S': 'TCG', 's': 'tcg', - 'Y': 'TAC', 'y': 'tac', - 'C': 'TGC', 'c': 'tgc', - 'W': 'TGG', 'w': 'tgg', - 'P': 'CCC', 'p': 'ccc', - 'H': 'CAT', 'h': 'cat', - 'Q': 'GAA', 'q': 'gaa', - 'R': 'CGA', 'r': 'cga', - 'I': 'ATT', 'i': 'att', - 'M': 'ATG', 'm': 'atg', - 'T': 'ACC', 't': 'acc', - 'N': 'AAT', 'n': 'aat', - 'K': 'AAA', 'k': 'aaa', - 'V': 'GTT', 'v': 'gtt', - 'A': 'GCA', 'a': 'gca', - 'D': 'GAT', 'd': 'gca', - 'E': 'GAG', 'e': 'gag', - 'G': 'GGG', 'g': 'ggg' - } - - -threel = {'A': 'ALA', 'R': 'ARG', 'N': 'ASN', 'D': "ASP", 'V': 'VAL', - 'H': 'HIS', 'G': "GLY", 'Q': "GLN", 'E': 'GLU', 'I': 'ILE', - 'L': 'LEU', 'K': 'LYS', 'M': 'MET', 'P': 'PRO', 'S': 'SER', - 'Y': 'TYR', 'T': 'THR', 'W': 'TRP', 'F': 'PHE', 'C': 'CYS', - 'a': 'ala', 'r': 'arg', 'n': 'asn', 'd': "asp", 'v': 'val', - 'h': 'his', 'g': "gly", 'q': "gln", 'e': 'glu', 'i': 'ile', - 'l': 'leu', 'k': 'lys', 'm': 'met', 'p': 'pro', 's': 'ser', - 'y': 'tyr', 't': 'thr', 'w': 'trp', 'f': 'phe', 'c': 'cys' -} - - -def length_info (protein: str) -> int: - """ - Сounting the length of an amino acid sequence/protein in the number of amino acids - :param protein: sequence of protein - :return: number of amino acids in an amino acid sequence/protein - """ - return len(protein) - - -def count_percentage_aa(seq: str) -> dict: - """ - Count percentage of each amino acid in sequence - arguments: - - seq (str): sequence for counting - return: - - dict: dictionary with counted percentage - """ - l = count_length(seq) - result = {} - for aa in seq: - if aa not in result: - result[aa] = 1 - else: - result[aa] += 1 - result.update((key, round(value / l * 100, 2)) for key, value in result.items()) - res = {key: value for key, value in sorted(result.items(), key=lambda item: item[1], reverse=True)} - return res - - -def compare_pattern(sequence: str, pattern: str) -> bool: - """ - Compare a given pattern to a fragment of sequence of the same length - arguments: - - sequence (str): sequence fragment to compare with the pattern - - pattern (str): pattern for comparison - return: - - (bool): whether pattern and fragment match - """ - for i in range(0, len(sequence)): - if not sequence[i] == pattern[i]: - return False - return True - - -def find_pattern(sequences: list, pattern: str) -> dict: - """ - Find all non-overlaping instances of a given pattern in sequences - arguments: - - sequences (list): sequences to find the pattern in - - pattern (str): pattern in question - return - - finds(dict): dictionary with sequences as keys and lists of indexes of patterns and the number of patterns as values - """ - finds = {} - for j in range(0, len(sequences)): - find = [] - for i in range(0, len(sequences[j])): - if compare_pattern(sequences[j][i:i + len(pattern)], pattern): - find.append(i) - i += len(pattern) - finds[sequences[j]] = [len(find)] + find - return finds - - -def get_protein_gene(protein): - """ - Transforming of an amino acid sequence/protein to DNA sequence - :param protein: amino acid sequence of protein - :return: sequence of protein in the DNA sequence form - """ - return ''.join([retrnaslation_dict[aa] for i in protein]) - - -def rename_three_letter_name(seqs: list, sep='') -> list: - """ - Transform into a three-letter amino acids entry. - arguments: - - seqs (list): list of sequences for transforming to three-letter entire - - sep (str): separator between aminoacids, default = '' - return: - - list: transformed sequences with separators - """ - res = [] - for seq in seqs: - threel_form = '' - for aa in seq: - threel_form = threel_form + threel[aa] + sep - if sep: - threel_form = threel_form[:-1] - res.append(threel_form) - return res - - -def protein_tool(*proteins, options=None): - proteins = list(proteins) - - operations = { - 'compare': compare, - 'length': length_info, - 'percentage': count_percentage_aa, - 'pattern': find_pattern, - '3Letter_name': rename_three_letter_name, - 'DNA_code': get_protein_gene - } - - if options == 'compare': - result = operations[options](proteins[:-2], proteins[-2], proteins[-1]) - return (result) - elif options == '3Letter_name': - result = operations[options](proteins[:-1], proteins[-1]) - return result - elif options == 'length' or options == 'percentage' or options == 'DNA_code': - result = [] - for protein in proteins: - res = operations[options](protein) - result.append(res) - return (result) - else: - raise ValueError('Incorrect options input, please try again') From 756f6c006023253ec39cbc0880d5d8fafa6ba463 Mon Sep 17 00:00:00 2001 From: Gleb Bobkov Date: Wed, 1 May 2024 11:49:41 +0300 Subject: [PATCH 03/10] uptade file --- main.py | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 8 deletions(-) diff --git a/main.py b/main.py index e26000e..a3f6d07 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,14 @@ +import os +import sys +import time +import datetime +import requests +import traceback +import functools +from io import StringIO +from dotenv import load_dotenv + + def transcribe(sequns: str) -> str: """ That function transcribe DNA to RNA @@ -226,8 +237,6 @@ def fasta_filtering( return inline_new_dict_fasta - - def length_info(protein: str) -> int: """ Сounting the length of an amino acid sequence/protein in the number of amino acids @@ -450,8 +459,6 @@ def protein_tool(*proteins, options=None): from Bio import SeqIO -#from Bio.SeqUtils import GC - class FastQFilter: def __init__(self, input_file, output_file, min_length, min_quality, min_gc): self.input_file = input_file @@ -461,11 +468,63 @@ def __init__(self, input_file, output_file, min_length, min_quality, min_gc): self.min_gc = min_gc def filter_fastq(self): - with open(self.output_file, 'w') as output_handle: - for record in SeqIO.parse(self.input_file, 'fastq'): + with open(self.output_file, "w") as output_handle: + for record in SeqIO.parse(self.input_file, "fastq"): if ( len(record.seq) >= self.min_length - and min(record.letter_annotations["phred_quality"]) >= self.min_quality + and min(record.letter_annotations["phred_quality"]) + >= self.min_quality and Bio.SeqUtils.GC(record.seq) >= self.min_gc ): - SeqIO.write(record, output_handle, 'fastq') \ No newline at end of file + SeqIO.write(record, output_handle, "fastq") + + +load_dotenv("bot.env") + +TG_API_TOKEN = os.getenv("TG_API_TOKEN") + +TELEGRAM_API_URL = f"https://api.telegram.org/bot{TG_API_TOKEN}/sendMessage" + + +def telegram_logger(chat_id): + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + start_time = time.time() + stdout = StringIO() + stderr = StringIO() + sys.stdout = stdout + sys.stderr = stderr + try: + result = func(*args, **kwargs) + status = "completed successfully" + except Exception as e: + error_info = "".join(traceback.format_exception_only(type(e), e)) + status = f"failed with error: {error_info}" + finally: + sys.stdout = sys.__stdout__ + sys.stderr = sys.__stderr__ + stdout_str = stdout.getvalue().strip() + stderr_str = stderr.getvalue().strip() + elapsed_time = time.time() - start_time + elapsed_time_str = str(datetime.timedelta(seconds=elapsed_time)) + message = ( + f"Function '{func.__name__}' {status} in {elapsed_time_str}\n\n" + ) + if stdout_str: + message += f"STDOUT:\n{stdout_str}\n\n" + if stderr_str: + message += f"STDERR:\n{stderr_str}\n\n" + send_telegram_message(chat_id, message) + return result + + return wrapper + + return decorator + + +def send_telegram_message(chat_id, text): + params = {"chat_id": chat_id, "text": text, "parse_mode": "HTML"} + response = requests.post(TELEGRAM_API_URL, params=params) + if not response.ok: + print(f"Failed to send Telegram message: {response.text}") From 35c8eaa0208ac7e8149840bb61acc10fb17cb24c Mon Sep 17 00:00:00 2001 From: Gleb Bobkov Date: Wed, 1 May 2024 11:50:06 +0300 Subject: [PATCH 04/10] uptade file --- bio_files_processor.py | 66 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/bio_files_processor.py b/bio_files_processor.py index d80f08d..4339dc8 100644 --- a/bio_files_processor.py +++ b/bio_files_processor.py @@ -1,5 +1,9 @@ from os import makedirs import os.path +import time +import os +import shutil +import tempfile def convert_multiline_fasta_to_oneline(path_input: str, output_fasta = None): @@ -37,3 +41,65 @@ def convert_multiline_fasta_to_oneline(path_input: str, output_fasta = None): with open(output_fasta, mode='w') as f: for line in list_for_printing: f.write(line + '\n') + + + +class FastaRecord: + def __init__(self, fasta_id, description, sequence): + self.id = fasta_id + self.description = description + self.sequence = sequence + + def __repr__(self): + return f"FastaRecord(id={self.id}, description={self.description}, sequence={self.sequence})" + +class OpenFasta: + def __init__(self, file_path, mode='r'): + self.file_path = file_path + self.mode = mode + + def __enter__(self): + self.file = open(self.file_path, self.mode) + return self + + def __exit__(self, exc_type, exc_value, traceback): + if self.file: + self.file.close() + + def __iter__(self): + return self + + def __next__(self): + header = None + lines = [] + + while True: + line = self.file.readline() + if not line: + if header is not None: + return FastaRecord(fasta_id=header[0], description=header[1], sequence=''.join(lines)) + else: + raise StopIteration + + line = line.strip() + + if line.startswith('>'): + if header is not None: + return FastaRecord(fasta_id=header[0], description=header[1], sequence=''.join(lines)) + else: + header = self.parse_header(line) + lines = [] + else: + lines.append(line) + + def parse_header(self, header_line): + parts = header_line[1:].split(maxsplit=1) + fasta_id = parts[0] + description = parts[1] if len(parts) > 1 else '' + return fasta_id, description + + def read_record(self): + return next(self) + + def read_records(self): + return list(self) From 0bfc194f80c68c875fb8bf8504b3cdd29b9c30ba Mon Sep 17 00:00:00 2001 From: Gleb Bobkov Date: Wed, 1 May 2024 11:50:49 +0300 Subject: [PATCH 05/10] created random forest code --- custom_random_forest.py | 52 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 custom_random_forest.py diff --git a/custom_random_forest.py b/custom_random_forest.py new file mode 100644 index 0000000..94e042f --- /dev/null +++ b/custom_random_forest.py @@ -0,0 +1,52 @@ +import numpy as np +import multiprocessing as mp +import time +from functools import partial +from sklearn.base import BaseEstimator +from sklearn.tree import DecisionTreeClassifier +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split + +class RandomForestClassifierCustom(BaseEstimator): + def __init__( + self, n_estimators=10, max_depth=None, max_features=None, random_state=None, n_jobs=-1 + ): + self.n_estimators = n_estimators + self.max_depth = max_depth + self.max_features = max_features + self.random_state = random_state + self.n_jobs = max(1, n_jobs) # Ensure n_jobs is at least 1 + + self.trees = [] + self.feat_ids_by_tree = [] + + def fit(self, X, y): + self.classes_ = np.unique(y) + with mp.Pool(processes=self.n_jobs) as pool: + results = pool.map(partial(self._fit_tree, X=X, y=y), range(self.n_estimators)) + # Extract trees and feat_ids_by_tree from results + self.trees = [result[0] for result in results] + self.feat_ids_by_tree = [result[1] for result in results] + return self + + def _fit_tree(self, i, X, y): + np.random.seed(self.random_state + i) + feat_ids = np.random.choice(range(X.shape[1]), self.max_features, replace=False) + self.feat_ids_by_tree.append(feat_ids) + sample_indices = np.random.choice(range(X.shape[0]), X.shape[0], replace=True) + X_sampled = X[sample_indices][:, feat_ids] + y_sampled = y[sample_indices] + tree = DecisionTreeClassifier( + max_depth=self.max_depth, max_features=self.max_features, random_state=self.random_state + ) + tree.fit(X_sampled, y_sampled) + return tree, feat_ids + + def predict(self, X): + predictions = [self._predict_tree(X, i) for i in range(self.n_estimators)] + return np.mean(predictions, axis=0) + + def _predict_tree(self, X, i): + feat_ids = self.feat_ids_by_tree[i] + X_subset = X[:, feat_ids] + return self.trees[i].predict(X_subset) \ No newline at end of file From 88ebbcdd2e850fb17d97726f68c6129716a1575f Mon Sep 17 00:00:00 2001 From: Gleb Bobkov Date: Wed, 1 May 2024 11:51:27 +0300 Subject: [PATCH 06/10] add randomforest showing --- Showcases.ipynb | 112 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 Showcases.ipynb diff --git a/Showcases.ipynb b/Showcases.ipynb new file mode 100644 index 0000000..2957b1a --- /dev/null +++ b/Showcases.ipynb @@ -0,0 +1,112 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "00fd4357-3461-449d-99f3-cd84258d43bb", + "metadata": {}, + "source": [ + "# RandomForestClassifierCustom с многопоточгостью " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "53d6c283-a41f-4e0a-9f1b-6ff1f9d26d9b", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import make_classification\n", + "from custom_random_forest import RandomForestClassifierCustom\n", + "import numpy as np\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cce40041-bd48-49e3-9f15-c3c4cac2d3ee", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Время выполнения fit с 1 потоком: 6.72442626953125\n", + "Время выполнения fit с 2 потоками: 6.9135894775390625\n", + "Время выполнения predict с 1 потоком: 0.16434144973754883\n", + "Время выполнения predict с 2 потоками: 0.16273832321166992\n", + "Полученные предсказания совпадают: True\n" + ] + } + ], + "source": [ + "X, y = make_classification(n_samples=100000)\n", + "random_forest = RandomForestClassifierCustom(max_depth=30, n_estimators=10, \n", + " max_features=2, random_state=42)\n", + "\n", + "# 1 поток предсказание\n", + "start_time = time.time()\n", + "random_forest.fit(X, y)\n", + "fit_time_1_job = time.time() - start_time\n", + "\n", + "# 2 потока предсказание\n", + "start_time = time.time()\n", + "y_pred_1_job = random_forest.predict(X)\n", + "predict_time_1_job = time.time() - start_time\n", + "\n", + "# 1 поток\n", + "start_time = time.time()\n", + "random_forest.fit(X, y)\n", + "fit_time_2_jobs = time.time() - start_time\n", + "\n", + "# 2 потока\n", + "start_time = time.time()\n", + "y_pred_2_jobs = random_forest.predict(X)\n", + "predict_time_2_jobs = time.time() - start_time\n", + "\n", + "print(\"Время выполнения fit с 1 потоком:\", fit_time_1_job)\n", + "print(\"Время выполнения fit с 2 потоками:\", fit_time_2_jobs)\n", + "print(\"Время выполнения predict с 1 потоком:\", predict_time_1_job)\n", + "print(\"Время выполнения predict с 2 потоками:\", predict_time_2_jobs)\n", + "print(\"Полученные предсказания совпадают:\", np.array_equal(y_pred_1_job, y_pred_2_jobs))\n" + ] + }, + { + "cell_type": "markdown", + "id": "84bcae36-3129-4a95-9835-e18b8b0f365e", + "metadata": {}, + "source": [ + "Вообще оно работало чуть лучше, почему стало хуже я не понимаю, как и не понимал на всем курсе по ML почему иногда тот или иной код работает значительно хуже " + ] + }, + { + "cell_type": "markdown", + "id": "4dedad29-d2b8-40ea-a77f-78a5226b338b", + "metadata": {}, + "source": [ + "![Вообще оно работало чуть лучше, почему стало хуже я не понимаю, как и не понимал на всем курсе по ML почему иногда тот или иной код работает значительно хуже ](RandomForest_before.png)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From e3b1578eec05b4be490615e4ccff9e69ed42cb9b Mon Sep 17 00:00:00 2001 From: Gleb Bobkov Date: Wed, 1 May 2024 11:51:57 +0300 Subject: [PATCH 07/10] add all tests --- test_my_first_tool.py | 95 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 test_my_first_tool.py diff --git a/test_my_first_tool.py b/test_my_first_tool.py new file mode 100644 index 0000000..859b8a8 --- /dev/null +++ b/test_my_first_tool.py @@ -0,0 +1,95 @@ +import unittest +from main_script import (FastQFilter, + protein_tool, + fasta_filtering, + output_creating, + lenght_chech, + quality_chech, + dna_rna_tools) +#проверка чтения fastq +class TestFastQFilter(unittest.TestCase): + def setUp(self): + self.input_file = "test_input.fastq" + self.output_file = "test_output.fastq" + self.min_length = 50 + self.min_quality = 20 + self.min_gc = 40 + self.filter_obj = FastQFilter(self.input_file, self.output_file, self.min_length, self.min_quality, self.min_gc) + def tearDown(self): + import os + os.remove(self.output_file) + def test_filter_fastq(self): + with open(self.input_file, "w") as f: + f.write("@test_sequence\nACTG\n+\n{}\n".format("".join(chr(self.min_quality + 10) for _ in range(4)))) + self.filter_obj.filter_fastq() + with open(self.output_file, "r") as f: + filtered_record = f.read().strip() + self.assertEqual(filtered_record, "@test_sequence\nACTG\n+\n{}\n".format("".join(chr(self.min_quality + 10) for _ in range(4)))) + +#Проверка ошибок, для функции с ValueError +class TestProteinTool(unittest.TestCase): + def test_incorrect_options_input(self): + with self.assertRaises(ValueError): + protein_tool("protein1", "protein2", options="incorrect_option") + +class TestFastaFiltering(unittest.TestCase): + def test_incorrect_gc_bounds_input(self): + seqs = {"seq1": ("ATGC", "good_quality")} + with self.assertRaises(ValueError): + fasta_filtering(seqs, gc_bounds="incorrect_input") + + def test_incorrect_length_bounds_input(self): + seqs = {"seq1": ("ATGC", "good_quality")} + with self.assertRaises(ValueError): + fasta_filtering(seqs, length_bounds="incorrect_input") + +class TestOutputCreating(unittest.TestCase): + def test_output_creation_with_filename(self): + # Проверяем, что создается выходной файл с заданным именем + outline_new_dict_fasta = {"seq1": ("ATGC", "comment", "quality")} + output_filename = "output_file" + output_creating("input_file.fasta", output_filename, outline_new_dict_fasta) + self.assertTrue(os.path.exists("fastq_filtrator_resuls/" + output_filename + ".fastq")) + + def test_output_creation_without_filename(self): + # Проверяем, что создается выходной файл с именем входного файла + outline_new_dict_fasta = {"seq1": ("ATGC", "comment", "quality")} + input_path = "input_file.fasta" + output_creating(input_path, None, outline_new_dict_fasta) + self.assertTrue(os.path.exists("fastq_filtrator_resuls/" + input_path)) + +class TestLengthCheck(unittest.TestCase): + def test_length_check_within_bounds(self): + # Проверяем, что функция возвращает True, если длина последовательности внутри заданных границ + sequence = "ATGC" + length_bounds = (1, 10) + self.assertTrue(lenght_chech(sequence, length_bounds)) + + def test_length_check_outside_bounds(self): + # Проверяем, что функция возвращает False, если длина последовательности вне заданных границ + sequence = "ATGC" + length_bounds = (10, 20) + self.assertFalse(lenght_chech(sequence, length_bounds)) + +class TestQualityCheck(unittest.TestCase): + def test_quality_check_above_threshold(self): + # Проверяем, что функция возвращает True, если качество последовательности выше порога + quality_sequence = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + quality_threshold = 60 + self.assertTrue(quality_chech(quality_sequence, quality_threshold)) + + def test_quality_check_below_threshold(self): + # Проверяем, что функция возвращает False, если качество последовательности ниже порога + quality_sequence = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + quality_threshold = 100 + self.assertFalse(quality_chech(quality_sequence, quality_threshold)) + +class TestDNARnaTools(unittest.TestCase): + def test_input_rna_sequence(self): + # Проверяем, что функция выбрасывает ValueError, если на вход подается РНК + with self.assertRaises(ValueError): + dna_rna_tools("AUGC", "length") + + +if __name__ == '__main__': + unittest.main() From 0b35ddef8a8199e43bab5565de38250a59d9e5a9 Mon Sep 17 00:00:00 2001 From: Gleb Bobkov Date: Wed, 1 May 2024 11:52:25 +0300 Subject: [PATCH 08/10] add requirements for users --- requirements.txt | 404 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 404 insertions(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..621c910 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,404 @@ +adal==1.2.2 +aiohttp==3.8.1 +aiosignal==1.2.0 +appdirs==1.4.4 +arcp==0.2.1 +argcomplete==1.8.1 +async-timeout==4.0.1 +attrs==21.2.0 +avro==1.11.0 +azure-agrifood-farming==1.0.0b1 +azure-ai-anomalydetector==3.0.0b3 +azure-ai-formrecognizer==3.2.0b2 +azure-ai-language-conversations==1.0.0b1 +azure-ai-language-questionanswering==1.0.0 +azure-ai-metricsadvisor==1.0.1 +azure-ai-textanalytics==5.2.0b3 +azure-ai-translation-document==1.0.0b6 +azure-appconfiguration==1.3.0 +azure-applicationinsights==0.1.0 +azure-batch==11.0.0 +azure-cognitiveservices-anomalydetector==0.3.0 +azure-cognitiveservices-formrecognizer==0.1.1 +azure-cognitiveservices-knowledge-qnamaker==0.3.0 +azure-cognitiveservices-language-luis==0.7.0 +azure-cognitiveservices-language-spellcheck==2.0.0 +azure-cognitiveservices-language-textanalytics==0.2.0 +azure-cognitiveservices-personalizer==0.1.0 +azure-cognitiveservices-search-autosuggest==0.2.0 +azure-cognitiveservices-search-customimagesearch==0.2.0 +azure-cognitiveservices-search-customsearch==0.3.0 +azure-cognitiveservices-search-entitysearch==2.0.0 +azure-cognitiveservices-search-imagesearch==2.0.0 +azure-cognitiveservices-search-newssearch==2.0.0 +azure-cognitiveservices-search-videosearch==2.0.0 +azure-cognitiveservices-search-visualsearch==0.2.0 +azure-cognitiveservices-search-websearch==2.0.0 +azure-cognitiveservices-vision-computervision==0.9.0 +azure-cognitiveservices-vision-contentmoderator==1.0.0 +azure-cognitiveservices-vision-customvision==3.1.0 +azure-cognitiveservices-vision-face==0.5.0 +azure-common==1.1.28 +azure-communication-chat==1.1.0 +azure-communication-identity==1.0.1 +azure-communication-networktraversal==1.0.0b1 +azure-communication-phonenumbers==1.0.1 +azure-communication-sms==1.0.1 +azure-confidentialledger==1.0.0b1 +azure-containerregistry==1.0.0b7 +azure-core==1.20.1 +azure-data-tables==12.1.1 +azure-digitaltwins-core==1.1.0 +azure-eventgrid==4.7.0 +azure-eventhub==5.6.2 +azure-eventhub-checkpointstoreblob==1.1.4 +azure-eventhub-checkpointstoreblob-aio==1.1.4 +azure-eventhub-checkpointstoretable==1.0.0b1 +azure-graphrbac==0.61.1 +azure-identity==1.7.1 +azure-iot-deviceupdate==1.0.0b1 +azure-iot-modelsrepository==1.0.0b2 +azure-keyvault==4.1.0 +azure-keyvault-administration==4.1.0b2 +azure-keyvault-certificates==4.4.0b2 +azure-keyvault-keys==4.5.0b5 +azure-keyvault-secrets==4.4.0b2 +azure-loganalytics==0.1.0 +azure-media-analytics-edge==1.0.0b1 +azure-media-videoanalyzer-edge==1.0.0b3 +azure-messaging-webpubsubservice==1.0.0b3 +azure-mgmt-advisor==9.0.0 +azure-mgmt-agfood==1.0.0b1 +azure-mgmt-agrifood==1.0.0b1 +azure-mgmt-alertsmanagement==1.0.0 +azure-mgmt-apimanagement==2.1.0 +azure-mgmt-appconfiguration==2.0.0 +azure-mgmt-applicationinsights==1.0.0 +azure-mgmt-appplatform==6.1.0 +azure-mgmt-attestation==1.0.0 +azure-mgmt-authorization==2.0.0 +azure-mgmt-automanage==1.0.0b2 +azure-mgmt-automation==1.1.0b1 +azure-mgmt-avs==7.0.0b1 +azure-mgmt-azureadb2c==1.0.0b1 +azure-mgmt-azurearcdata==1.0.0 +azure-mgmt-azurestack==1.0.0 +azure-mgmt-azurestackhci==6.1.0b1 +azure-mgmt-baremetalinfrastructure==1.0.0 +azure-mgmt-batch==16.0.0 +azure-mgmt-batchai==7.0.0b1 +azure-mgmt-billing==6.0.0 +azure-mgmt-botservice==1.0.0 +azure-mgmt-cdn==11.0.0 +azure-mgmt-changeanalysis==1.0.0 +azure-mgmt-chaos==1.0.0b2 +azure-mgmt-cognitiveservices==12.0.0 +azure-mgmt-commerce==6.0.0 +azure-mgmt-communication==1.0.0 +azure-mgmt-compute==23.1.0 +azure-mgmt-confidentialledger==1.0.0b1 +azure-mgmt-confluent==2.0.0b1 +azure-mgmt-consumption==8.0.0 +azure-mgmt-containerinstance==9.1.0 +azure-mgmt-containerregistry==8.2.0 +azure-mgmt-containerservice==16.3.0 +azure-mgmt-core==1.3.0 +azure-mgmt-cosmosdb==7.0.0b2 +azure-mgmt-costmanagement==3.0.0 +azure-mgmt-customproviders==1.0.0 +azure-mgmt-databox==1.0.0 +azure-mgmt-databoxedge==1.0.0 +azure-mgmt-databricks==1.1.0b1 +azure-mgmt-datadog==2.0.0 +azure-mgmt-datafactory==2.0.0 +azure-mgmt-datalake-analytics==0.6.0 +azure-mgmt-datalake-store==1.0.0 +azure-mgmt-datamigration==10.0.0 +azure-mgmt-dataprotection==1.0.0b1 +azure-mgmt-datashare==1.0.0 +azure-mgmt-deploymentmanager==1.0.0 +azure-mgmt-deviceupdate==1.0.0b3 +azure-mgmt-devspaces==1.0.0b1 +azure-mgmt-devtestlabs==9.0.0 +azure-mgmt-digitaltwins==6.0.0 +azure-mgmt-dns==8.0.0 +azure-mgmt-documentdb==0.1.3 +azure-mgmt-edgegateway==0.1.0 +azure-mgmt-edgeorder==1.0.0b1 +azure-mgmt-elastic==1.0.0 +azure-mgmt-eventgrid==9.0.0 +azure-mgmt-eventhub==10.0.0 +azure-mgmt-extendedlocation==1.0.0 +azure-mgmt-fluidrelay==1.0.0b1 +azure-mgmt-frontdoor==1.0.0 +azure-mgmt-guestconfig==1.0.0b1 +azure-mgmt-hanaonazure==1.0.0 +azure-mgmt-hdinsight==9.0.0 +azure-mgmt-healthbot==1.0.0b1 +azure-mgmt-healthcareapis==1.1.0b1 +azure-mgmt-hybridcompute==7.0.0 +azure-mgmt-hybridkubernetes==1.1.0 +azure-mgmt-hybridnetwork==1.0.0 +azure-mgmt-imagebuilder==1.0.0b1 +azure-mgmt-iotcentral==9.0.0b1 +azure-mgmt-iothub==2.1.0 +azure-mgmt-iothubprovisioningservices==1.0.0 +azure-mgmt-keyvault==9.2.0 +azure-mgmt-kubernetesconfiguration==1.0.0b1 +azure-mgmt-kusto==2.1.0 +azure-mgmt-labservices==1.0.0 +azure-mgmt-loganalytics==11.0.0 +azure-mgmt-logic==9.0.0 +azure-mgmt-logz==1.0.0 +azure-mgmt-machinelearningcompute==1.0.0b1 +azure-mgmt-machinelearningservices==1.0.0 +azure-mgmt-maintenance==2.0.0 +azure-mgmt-managedservices==6.0.0 +azure-mgmt-managementgroups==1.0.0 +azure-mgmt-managementpartner==1.0.0 +azure-mgmt-maps==2.0.0 +azure-mgmt-marketplaceordering==1.1.0 +azure-mgmt-media==8.0.0 +azure-mgmt-mixedreality==1.0.0 +azure-mgmt-monitor==2.0.0 +azure-mgmt-msi==6.0.0b1 +azure-mgmt-netapp==5.1.0 +azure-mgmt-network==19.2.0 +azure-mgmt-notificationhubs==7.0.0 +azure-mgmt-operationsmanagement==1.0.0 +azure-mgmt-peering==1.0.0 +azure-mgmt-policyinsights==1.1.0b1 +azure-mgmt-portal==1.0.0 +azure-mgmt-powerbidedicated==1.0.0 +azure-mgmt-powerbiembedded==2.0.0 +azure-mgmt-privatedns==1.0.0 +azure-mgmt-purview==1.0.0 +azure-mgmt-quantum==1.0.0b2 +azure-mgmt-quota==1.0.0b2 +azure-mgmt-rdbms==10.0.0 +azure-mgmt-recoveryservices==2.0.0 +azure-mgmt-recoveryservicesbackup==3.0.0 +azure-mgmt-recoveryservicessiterecovery==1.0.0b1 +azure-mgmt-redhatopenshift==1.0.0 +azure-mgmt-redis==13.0.0 +azure-mgmt-redisenterprise==1.0.0 +azure-mgmt-regionmove==1.0.0b1 +azure-mgmt-relay==1.1.0 +azure-mgmt-reservations==1.0.0 +azure-mgmt-resource==20.0.0 +azure-mgmt-resourcegraph==8.1.0b1 +azure-mgmt-resourcehealth==1.0.0b1 +azure-mgmt-resourcemover==1.1.0b2 +azure-mgmt-scheduler==7.0.0b1 +azure-mgmt-search==8.0.0 +azure-mgmt-security==2.0.0b1 +azure-mgmt-securityinsight==1.0.0b1 +azure-mgmt-serialconsole==1.0.0 +azure-mgmt-servermanager==2.0.0 +azure-mgmt-servicebus==7.1.0 +azure-mgmt-servicefabric==2.0.0 +azure-mgmt-servicefabricmanagedclusters==1.0.0 +azure-mgmt-servicelinker==1.0.0b1 +azure-mgmt-signalr==1.0.0 +azure-mgmt-sql==3.0.1 +azure-mgmt-sqlvirtualmachine==1.0.0b1 +azure-mgmt-storage==19.0.0 +azure-mgmt-storagecache==1.1.0 +azure-mgmt-storageimportexport==1.0.0b1 +azure-mgmt-storagepool==1.0.0 +azure-mgmt-storagesync==1.0.0 +azure-mgmt-streamanalytics==1.0.0rc1 +azure-mgmt-subscription==1.0.0 +azure-mgmt-support==6.0.0 +azure-mgmt-synapse==2.1.0b2 +azure-mgmt-testbase==1.0.0b1 +azure-mgmt-timeseriesinsights==1.0.0 +azure-mgmt-trafficmanager==1.0.0b1 +azure-mgmt-videoanalyzer==1.0.0b3 +azure-mgmt-vmwarecloudsimple==1.0.0b1 +azure-mgmt-web==5.0.0 +azure-mgmt-webpubsub==1.0.0 +azure-mgmt-workloadmonitor==1.0.0b2 +azure-mixedreality-authentication==1.0.0b1 +azure-monitor-opentelemetry-exporter==1.0.0b6 +azure-monitor-query==1.0.1 +azure-purview-administration==1.0.0b2 +azure-purview-catalog==1.0.0b3 +azure-purview-scanning==1.0.0b3 +azure-schemaregistry==1.0.0 +azure-schemaregistry-avroserializer==1.0.0b4 +azure-search-documents==11.3.0b5 +azure-security-attestation==1.0.1 +azure-servicebus==7.4.0 +azure-servicefabric==8.0.0.0 +azure-servicemanagement-legacy==0.20.7 +azure-storage-blob==12.9.0 +azure-storage-blob-changefeed==12.0.0b3 +azure-storage-file-datalake==12.5.0 +azure-storage-file-share==12.6.0 +azure-storage-queue==12.1.6 +azure-synapse==0.1.1 +azure-synapse-accesscontrol==0.8.0 +azure-synapse-artifacts==0.10.0 +azure-synapse-managedprivateendpoints==0.5.0 +azure-synapse-monitoring==0.3.0 +azure-synapse-spark==0.8.0 +azure-template==0.0.18b3 +Babel==2.8.0 +bcrypt==3.2.0 +beautifulsoup4==4.10.0 +biopython==1.81 +black==21.10b0 +blinker==1.4 +boto3==1.20.34 +botocore==1.23.34 +CacheControl==0.12.10 +cachetools==5.0.0 +certifi==2020.6.20 +chardet==4.0.0 +charset-normalizer==2.0.6 +click==8.0.3 +colorama==0.4.4 +coloredlogs==7.3 +command-not-found==0.3 +ConfigArgParse==1.5.3 +connection-pool==0.0.3 +contourpy==1.2.1 +cryptography==3.4.8 +cwltool==3.1.20220224085855 +cycler==0.12.1 +datrie==0.8.2 +dbus-python==1.2.18 +decorator==4.4.2 +distro==1.7.0 +distro-info==1.1+ubuntu0.2 +docutils==0.17.1 +drmaa==0.7.9 +dropbox==11.26.0 +filelock==3.6.0 +Flask==2.0.1 +fonttools==4.51.0 +frozenlist==1.2.0 +ftputil==5.0.3 +gevent==21.8.0 +gitdb==4.0.9 +GitPython==3.1.24 +google-auth==1.5.1 +greenlet==1.1.2 +gyp==0.1 +html5lib==1.1 +httplib2==0.20.2 +humanfriendly==10.0 +idna==3.3 +importlib-metadata==4.6.4 +ipython_genutils==0.2.0 +isodate==0.6.1 +itsdangerous==2.1.0 +jeepney==0.7.1 +Jinja2==3.0.3 +jmespath==0.10.0 +joblib==1.4.0 +jsonschema==3.2.0 +jupyter-core==4.9.1 +keyring==23.5.0 +kiwisolver==1.4.5 +kubernetes==12.0.1 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lockfile==0.12.2 +lxml==4.8.0 +MarkupSafe==2.0.1 +matplotlib==3.8.4 +mistune==0.8.4 +more-itertools==8.10.0 +msal==1.17.0 +msal-extensions==1.0.0 +msgpack==1.0.3 +msrest==0.6.21 +msrestazure==0.6.4 +multidict==5.1.0 +mypy-extensions==0.4.3 +nbformat==5.1.3 +netifaces==0.11.0 +networkx==2.4 +numpy==1.26.1 +oauthlib==3.2.0 +olefile==0.46 +packaging==24.0 +paramiko==2.9.3 +pathspec==0.9.0 +Pillow==9.0.1 +platformdirs==2.5.1 +ply==3.11 +portalocker==2.2.1 +prettytable==2.5.0 +prov==2.0.0 +psutil==5.9.0 +PuLP==2.5.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.1 +pydot==1.4.2 +Pygments==2.11.2 +PyGObject==3.42.1 +pygraphviz==1.7 +pyinotify==0.9.6 +PyJWT==2.3.0 +PyNaCl==1.5.0 +pyOpenSSL==21.0.0 +pyparsing==2.4.7 +pyrsistent==0.18.1 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.8.1 +python-dotenv==1.0.1 +python-irodsclient==0.8.1 +pytz==2022.1 +PyYAML==5.4.1 +ratelimiter==1.2.0.post0 +rdflib==6.1.1 +rdflib-jsonld==0.6.1 +reportlab==3.6.8 +requests==2.25.1 +requests-oauthlib==1.3.0 +roman==3.3 +rsa==4.8 +ruamel.yaml==0.17.16 +ruamel.yaml.clib==0.2.6 +s3transfer==0.5.0 +schema-salad==8.2.20220103095339 +scikit-learn==1.4.2 +scipy==1.13.0 +SecretStorage==3.3.1 +shellescape==3.4.1 +simplejson==3.17.6 +six==1.16.0 +smart-open==5.2.1 +smmap==5.0.0 +snakemake==6.15.1 +soupsieve==2.3.1 +SPARQLWrapper==1.8.5 +stone==3.3.1 +stopit==1.1.2 +systemd-python==234 +tabulate==0.8.9 +threadpoolctl==3.5.0 +tomli==1.2.2 +toposort==1.6 +traitlets==5.1.1 +typing-extensions==3.10.0.2 +uamqp==1.5.1 +ubuntu-advantage-tools==8001 +ufw==0.36.1 +unattended-upgrades==0.1 +urllib3==1.26.5 +wadllib==1.3.6 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.2.3 +Werkzeug==2.0.2 +wrapt==1.13.3 +yappi==1.3.3 +yarl==1.7.2 +zipp==1.0.0 +zope.event==4.4 +zope.interface==5.4.0 From f872b23bc6ea7d7ddefcd8c22af01ed0fd58068c Mon Sep 17 00:00:00 2001 From: Gleb Bobkov Date: Wed, 1 May 2024 11:55:27 +0300 Subject: [PATCH 09/10] add screenshot for ipynb --- RandomForest_before.png | Bin 0 -> 32585 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 RandomForest_before.png diff --git a/RandomForest_before.png b/RandomForest_before.png new file mode 100644 index 0000000000000000000000000000000000000000..2f8ebf0005317fb8610e7c55c2caab999ce08396 GIT binary patch literal 32585 zcmd?Rd05ix`|oS3!PTUzwMr|e)o7MO<&-0}T4rWy&f=Ik<$#)qvryj#D@)5+P$zs34`>%8U5HHXVJj4BY-mm9<-w)4j zS)1?Qb99fCl+=EU>wnuxN$nVylG;W3^%u!2(O2~JCBL?T?acp@s_0jolRViGaK-A1 zlvGvf-VN`alIOd_uLHqSQol!T{%o7{SBa65f=*ieeZ?WhbDnac^5>UEO$OIqXH}Lp zTV!|q!}eL(y!p#bWp6GYzMW+BzVM!oOzVUF+f38$Z~yv5CjVD+qxakBDGNIn{0=4i zI#zx4uLN@YUI=2p{pO?(?6`9X!Hjoa9TT&}+oh!LyHuZ%G-1%!R`OWTRUmo%JKb!X zl+@GH$&&W}W+f+S;h%7IN$)P}01rwoxa%e-1c>S44e)&SD4zc_y%MiA0EG(qs;C6+ z)!3CDHlfDPO#>(BV|sNHNUPKYP8S`I!WE%ZgOkh*yXloM%>jSDOS6eQ6_yy;piRdP z9YA22UkT%NfDJARP-Jg#bh65L`)X=JrnR?6K9s~PGj;3ya+f0w6~U+*UScY9VF@M% z_%X|jN)r|Man)oX6~4l+KBBaM7qMV1qA;-lH8b81+X!yv!NlvZb#|Bt#F)tz^Ry_k z+FBzK}t!qWqrfe6o+#MBZ@npw9D(b;H{}OOq@k1KgH>%nNg(l&Ns3 z81W6>ZsYnMU?d^qed9O4*|nx8jv7cyZU{qzRUg7Y+=R#JOQ)J;XVY9`qgr<<)sddy z;HeO&_0xTen#3Qc)atDN>Y1##97cPOY<17CgpUX7Q}pkvHU^WpN9qpL8ZZfgfc^qP zz1mIdBNqXpPVrgp>x*>Go!FnRV&P`01}UMbuvo5_-_@*L2{9(M&V$YOf}bUYp#-ji z4W2CF7c63A_0wzbB;4@m2SX2v;50Z%e=?P#hG!nx^}We1@t0JVXuypB-X%ZvZ);%t zE0{o9NowuTS)%IVnD1cf;3~CNWp~?p>V;UDz^W!$uB_*d`(AZh*Nx_xC3(T}&7R;E zlK+*NzQYcNuhNCxs}hV9{3$qKG3v{8x)D|(4g9FudO&Iovh|h~n-LYGS1j0}jF3dn z<#(I!v>8o*I+U3{=-iC1-_Q=I4?dgG_Nyb>=6ls<^ga5gJ(6qx%4GZz6S5hdZ#Rct zPZE6hZ~cG!?Noy0gd(PmsaU8rvs-F_l}&|-x<;wls!{`6LlsX4M(%l;9tJ&QgEB+E zGUbJYo#1RydFTj(kxGKrFWL%g>JEs*jDJ{Rk}y3 z3T*8(#me9DTDShAHvsF_WCc=9&%=!ko{ktd&o0KYOOLEN1HOBl*$fQR&+72*LTZbt zI?T?5lnsQfQv#+*`T(r5m#8k}yU%jFU<(S#0zsFnsWG>!*l=^c~xTXh% z=HDUAT1VuV5n6woysLBxbV%|~G6~&lDD7mVmC*})da{04;$jnBza>1zI3pfCDccUh+T?hD1!_}E14Gi`NbV5D+y ztF~;or54rU9L1Wgtoku2*ntcdjdjDZLeyK_9rvb|ctNUCmZ#Zoe7IR~@? z2J;S|V|n1lNt4B;Mhv}|l-4*W=+2A)se!kfH)=E7EZxpSR~#&7ndo#j#{mKXULaof zNVI?6W0%De&3bkha>`!|sEJordo3=hBW}#(Yfl}04*%O0bmOro8U=L<5|ky`;5U{C zhWA0X8g)UQrSWXV+BF%6!V-=M~yGq%JB-bSJQfsPsIYmPX^?+$TI$m;}ea7rh( z{%r-<7)kyZh`dB;>~clW+$(aYCuVGQn+y!1t5aAd#*(&EGSl8jhv&S~ zmQubR#yiodQJlR1MiwW8=@Z`;?+K}a5SH7AeMMmv!?4x%ER_{q;p3yNmIvK1i3>M` z1KUme&$AAv)uV=Cfi9{j%_|^#Tcz4}$Elf4{EkY-g<7ojv>!W?9(21d$U&Yb|HWgo zyDcfr7v{A}7rHI1-TJuc)L)u+g5o@rUPITGY`CzVOz`=x11Vv9(^rvZTv2d4FNxA0 z{kREBkFV>boa$wP`$fM!Bts%HY_}oHH>Wl}NY0}>n@k)6`wy;*jj7Ib$BRcm@bt?S zoYJ(`*q=@W(=rRqCBVAosRw5PC(F{YM54N@E|6e$bW(6fnJ#&Pa)7}dv)-9c$`}FA z4&!02ahJk$l_HM>9KD(k=P=zc?J?ub^+3hXMLJ(EqLZytBPKj&FMIJGfpm+aKRm(02vr76*H3{9Sl+GI4rmL-AJ2 zvj)s;FvS6V1*(WIb`Z~inzOlzVcoXi!zW4;X{X(O=AgNC4K|sAp~Z&uYp63r@3N%c zw|K6^#>siCS6bsk9rLZ7SoQ31a3L|(OQUq;#aok^D8l|yBbCoZf{e<>;OniS*8NJA z)1mv;d&=+;r%SQj8%4!|r-SsvnCFLNV;S}LN@NiE(|K*HfS>9lif|!(eIf4WUvjN< z@E{0wpKiCQZ4{K5eN}a4A`Vl09vjY1V(!prS&hSr&hNp;gs;O{^>+F+sURj2_a zHoMH9;_mVKT8tm-nj}I-Ut+ypgY4r2`vm7y4T6at@5rHnPGn|%BqB~hu@;%zh>544 zZB1s2@>puX+E$k?w~t!^Ksh4MCHMbf+7gIMnibEx>|2ee7W>B-3>OztdkfNAAc4(o$S>oci%j_rh^F6$A}^U_S#@GL^B@FzUv83Z zpLg9{{Zv22u*AICrZvOa-afvYQg`8^O>!u{iI z4V>(qBGHG5oY6-eT;D;(m4)@fAsr(E)?c2%P%s;uROIYOH;B3>-w~(wgPNsfs%MMB zVvH2B`GFImj^xr#MRR$gQgdvgu~~brbd2RDa**xY3fGavKjsWX*HH|1+wiVme(p2a zP>>s#Y79+FSA4;?{3Ip6wK%AF%V7aeV-%{-w8(SmP?aH#8Rzd3A+LEfkkAQ5+FWdA zzn3ybY;jegWDwYRD07d6+~d15dogUS$)a#wIlk`jATB(4+6W};p3J%JI!wg1@e(a{B&&!WJn5q}Iu*Kl$CbIUwv2qo~!emKLM~rZ_+Amyr)fkI;j6a zs`HkbAM+n{*e;~;ypM6OqQOnBL73oXbvVi?)nNyBP%zaguhq6rnH{>+TTo{4m3o?% zp$kB2+z|?X0u)TZawZm5NkARSwFMbhIDdJ9aC?(E;X+PV6!8jlwC=9D9^=3u~_ z*7CUAL9m)ObI6CpN2qXA^J-?m{jA?Io8XPjav{}sMRGHoDMz)(7#M8qNm7P)R;7Ez zAuwN=RvoY!xc~-1=CsE)=57&vl~`?d)H_i^$T!YAd?OqX=c>%)_md82Vw>1)D}3z2 z`#52Pt^q9%O%Z z8Na?ELPBw(am5#(rGu-_Qb4TeXnm!J;=%bDlv;mbr z7kAt$sn0+ehkke?m%cP1W6k|M*B_Yo@Y`5kk1jZ`*%+N4YL%K_?$fMh6R%6`nmrSM z9pQKFXByv*|u>n&TnBcS4WE#yiilonoL%tL zrz#W1Xn*Tdn$gpG(LEH<;=gE436A<-b&#yU{x5zpJ*|m4q(UHwlGwaU*FUr*xvC9oa?4!P^~-U9-Qg5fmXgs4uZi| zt)nY;Ibp6rY+>FLYPyxBGoP0X7rwm5=QT+L2BbS+1Kpd4r08`88UsXuqOit@dJ9vx zAGcLv98PMCWERMG>Unwe9FX%WTc8UEwmX zdvc3IzD@qGIph!(Qh+G3u6J^-xl8obm?bQZ$a61M`VR_<1cdTm+)Ox8V|?4 zJr*5r8xlzo&bj^Cb*J`LAYz1(n2j4@TqCLTXrRKny~p)cfH&3?;w|60I}Z=>Hb{F+B+ z{$l)ZOF$idHVs?PWM;Nr*}2Ij5v(^iBwR8)G7fPvCTB2tSKb}oS)=jGj8pLZcv8|^ zQ`_=F^z-%VfvDx_ecEB96!Mpj)8~h=DfWI;Z&rgN@;N9DF^$|emqo~zt-h=8>|Auf z6WDz1sGEs`bEFT`*_S-*=~^S|w&-_-$TRBQj`HIi>B={7bM%i_=(z@NKSCy_CIfGV zEye?G#@oq(*gnqrR9Mt%Q;oVI)tN2maZPi}r$!9aJ?uR(fgGWtL2MQ)?7_$yci>~b z%5D2C+T!9p925__wN^M*4<}0~#eG7ZWEF5~@Ur!<RxLQKs(2Pg^sJ*I@xsd(hA{SKUigtgjFVx zXu5<|CMy+Jp1>x57#NQVx$ZKUXxRKdKiIYEg@<5IB!7}FO7P|h{cjb$y;VSMXna;&)a{v^=BY)gsv$%|IG{gyGlcQ&FOouo>VAHw+)YQ~-M| z+t+f_%W5^-OT?K=!h729VT3(l|B#8rp}cHx!!}T^;Gu{+Y5cX4sUUu>GoT79pAhfb z_TwLgzawY$_>+a>^3}82P57Wv^$WcP{6O9sr_yhG2vRkt!Cd`dk(1Ped6 z$T14OsGoof3PyvqYG9 z%$Bl~S^p{K%diq_l`Sd5eykU&}9Y%|;*GqGD`FO&9V^BqfVe_a) zRe-ft?7)oY@Wd)f!8P78PunimZr{QyZq=@By5}A=8aFepVJKKgo28Aa(Lk3@4mzm@ z+)|cHY>s?5F#u`}5Byd*7po%g5kvhwQTMmAy!o3K(N<0J5Pg-e>irR5)@OQMH6 z6$K{iu|n^;`7-o*t4+SJq??s5s_kw4J{e8iC3dvLzKa#tO^sZ|bHAU46*P;fZw(fp zS@l#?;jyTIY)VeRcgmLAjf9(J!^=gnA-75*AOGcrQVJM+nD*nd#S@7I6HLvMC76SF zh3FjmEN47gEhh4DJi|8_a?F1|rS4I#P7ANYxL@N2L^o0Q$=T3DtiY0AZI7-@y)|@? z%K&yiC?(e}hff9>&R*;{Et(m{Iob3U5Y@CgLB;8ml#77+-Q`Xa;Ul#T&DwD56_k)h zrc1AbzN9TjhBhm-R|g@4=jhmxy@jm@qY0ibg9OJ|2^;;-??qKw>Z7I-)y(0O6Bc4z zqsTLRp8e`!yg%z2+Bm$BxG>T*Sv>m0O*W7|DRFLCS8!t82cqAm>zXeNd}!V>`%QZ2 z1^0lpPZ}A;uG;;4u*%_~MG>11@`%{eqf2zCg%+Wy`zc@RVkyxKM z?)MNKL|p;uY%4PqmW)uXRWVWI=!V8YMrUP~eejoW$cW~-TO?46i+=T8#2Y6{$6 zrR7Rrm~W9n8Rx@Gr|5t|e|q!sm8GV=z(|qbu*w~Y4~2`le|c-Y76Nq~rQ8g7n$>rC zbXI=4)m6|Fi=E3y4XpKM&cbd91zrcR|u?%!h3ziu`$HH?DmAf^MLX7XJnTyQ*L0A_jz@WRdR*SS!wfc^zY z8D>f4Qv_zIMBteXRo>?xp46+POm$8*kD#>>HL=(eUMFS+?X{og95e@lN;iX6$yc8x zbO=kjw1%xR^7|)!6Ez@?vK`#8lVq$UMv(iNpQRqabo3*$X0_<}*84fkQJyXRWvIRL zbe2xtKalm1V0U3PS0j_s98;q&&*T+a|JrjRd(N$O((j?6qcvxwm?eli8jyaC;naEM zRG?v=a!<*$Q|wB;)tq~G%Z#@J*ac(=u6E(O5bwHqv9dch}=9OB^rmm|4b*OJD zRB<)&l)x-zSTxz=oHXg5be>`3)zdta@aT!i0e5$?Z`knJ((aL#z*fuBd-VWckM_OV z6M2L4jHZLl%VWLXy780=nbNz9@zPHnHtEx3#;A!s-m3Ca*`gREw%w<4Cb{E}8L24V z3hx1hdDocE$P@W(dwOg0e-j@8BaeRT)7s^AY%<|*bvc`hFwHprv=(srDp#n_jA$Xs zKU5gtD(r|c+KM#uTr*WFjFNz2seMMIKJED7Inz@WotX5)r>|fJ^Yz4Qud7N4dvTY5 zj|GT!EFav%==m@ZsYy71nnvh;AaAX?|GQIj^{KkaP%%#xB{mhRyxsnxnb)q5+Mv~W z87#io%9=(a%}T#r;?t5i%fJoJ`ar3sBxRh2hw^@;YMHM042U=AsJ4yiWO;z7tVPCo zgB6VuE)l3AtZ@%i#ERDPT-M?o$aP-C%#Cn+yiA3Q*_`z-!Mbl;ar0oSGysoWs8@;G zhxpeIRe7;tu0PKL*10jm3p3^aWUjUxT@2m=(Zr`f9kaglA=OMJ6drje;FRApZ@E4j zOlLMsL*X{p7^veBazTDf7SL1_)>E_W0@Rr)UVk_u`j#m%jnX{_sgq&JUklQ4fqR#? zIwiYg)Km(L+~}N>keb)PP-m&>+?{||W~e{UB$xQ|DD3X__EU-TJ_7|di;`s3(`D73 znTj?ssi!6mW6V$K+?BdzVE@mEc2;BYwFJ@bVwyjVhrBg8wQ}$Vz*jckOo5+17OjWG zbr*zD6vzHyjOx2(VYD#vX~$dc5k7+b`JgKo#JgihiaQ&Z%i1#PHp~(v2`bGS{dX9* z?91q><|Ap5)-qh@3*17Xb4h#U721ENGL?@1k;;T`Lej*%dCu&O#i&P?n8<2J*||Uy z1xON+*@-3LfBi`#HadWjD)Jn1*By;cS81S)8Mr*upvgPL4O6VLBPWP?@df30iMjR* z?T0e#SN{G8hR^>6mnqgZt@H;??v1r?&fp$fKsK!_PS%~C@oD8V%PqKrz(hCZai{1` zw|Xb@Ba{WS8?&iBPP&33*b$}>?q_s|7{GPRnBxt2iO=J(P#3NHP2}d#(Hab}7G7Nb z@@d~l5MkWV8h+@={17@@?0i-?Y1OgpYT}HrQ`(@EqB5Ue> zDnv+7?V`fuZf;eR5#y{<(@S6HeKH4G} zlT;gG;?|k&Wr8`kF6F!xberj~vYjZ}mpd+uLZd%dAC6f}PR@SYc`M0o@2vF-GNm3~Bzrd|x+T7{HTpKF!?E0cS5>{r@2OrMaRDZ{g$m-y+B{j@`ec+e znIf!r5|)VcE*+WJO`fkMm{lk5N~+jjbJw&xu7TAc?0>i}E;AB2KMi6%V~f+8b7m@8y(9s!TW{d(PgF!dsc_L zMi_r}v>1*2?mx8UT;a|T{T>GGbDTI!BHTPspflfo71FSLrY0lZ6Bjr))k2K?=9d?o zb!Ib@wwKR}siGaonyw$}3N_DFA@6X)I3*QW={k3aGOxklILhqeaKg+!n}i~G)k%|l zvc#t=C;5xJV==r>ceOgybtkfX5>szV+^Fejr`)lKJ?$NFtJjYBH(C(^`Vo=yebMhl z15rXy_{D}lCZ9&BT$5vY^cUFdD@ae213}L@;!FES?xn5fy-Z5vFn2fnlXO={C4cc7 zY-ZlKeWEtAzEm=O2j|es?FYX#CXNI@Low!yr#hY+s6IdYp~zMu=O0V{?Re!wDQLs| zauSF-MZ1Ps<`WZU{r9oda0r=V%9muwiRAj-K}66|&5<^~MN*e!%Q7*$=z?rt0;*hc zFQ*)4?DZM(2$T3=Lwx9~z9D^rsjdMuVCI?dp5dLz*ajE&M&5;;Yp<5%r1#*FJ`v>n7idth%#+=7Jm?->a>Mn6s;|C~fDIuo^(WlnFxI z>Y6ox$C-xZO1N(ODyuN?MEWr$x6)H&EhnWIy)t5qy$Op?|7Z|;uDM&usdU2dNY`tw zjBM}rHuh!JWMaP^?8<2gr{>p@4B1ae^LkzHr$Il7- zMuOO`te0mXx*`Q&TWpFxQK)DD7zQ)pv`E_(hgXsJ~FAwPP z(}}!1i9LEbjSk*0VumOwOpl@9)WoO{*SGxTe-JOubPMsyq)0)xm&u8me*~~fCRjXx zW%k*q*x^$lxUzF~mT*^C1 z5}y&=I?1XxHjz1NL6};0(US~J4GwgJZb#FQErhoR5pvTzx;qVfkNw{w9-Iyla~cwh z^EK~*yVqwVOJj#Ori6P@wK&#fO-tNl9S?sbYUnY-D88F}Z$gb#)4J(#g4!$rKE*J- zJ}b7)ww35ltS4F8c6t;CLZ}~MMSyN(j^H{e_n7yPqRjVBD)(6D>FpyEQ*qmEd_rMg zo1WkT8c+rEobTyTQWiCyM6K5gF3phgNPK2mWKi_}eV6_N|GK=Y*=c(wM1_YM&zR()}%VhQ-YzWmGGwjVcQv2K>Mp_w~DUx z_*3ShOCDQhKxmj#W=-E zk*kIX{L2rILs1{HeKJq$|KtX#^r-h~JM~>l6wLTkRCI-0dSGVqDM$}a%#E-3;w2lA zP|dGG<$f%g4rG_;qs;0&d3@{CV1iMge*^MeNjoSs`dmcyTtj>!&17IU|?1Fv>KE=6BeKuFhFghzWCTRAG=> z^adyRPw}01uv6;zozf|=KVIk_Iboq2t@aU&ZONK$yjeMYw=Uq2we`iD=zffkZc=Ef zBRh7&+xp6Cz+ioQq?|m*4(A40;L|ny6;ao5WeY_{or*q;GEb}(P0d5#}Wvs^gyWPE9)Z9*9i@c zV*jcXT9nC0)=k8-8=UeKYiu5*hFfzES27djK!x`&xCV$;@s;=P$pkV9d51=8SG>e4 zfci&K)q&l4VlHd|(0^=^$I+Sj!gAry)zjaWILg!p$AuV`Z>Iz0gacweKs3tn>^(t6 z%FWHM03*YFY&c<$Sn)4Z^|{@Dz&dI>gIw$I&xT5jK20t-4>?luYK1=sHQnsbkgU;& zTGiRO>o(NRS{%YooIM$td1d!KxA@IjC>|_1E#2OXyX5^(=0V~qTGh8H>FW!+HljN@ zClxZ(`@$;ub9L&Z)So3Db1jD!K>Knc2k(%;U6xiW7)$Z6FP2AsGo%>TBUR&2B3tM~ zL1Q?WseHe|)44aJw9jX_Dt$(T+GhvAuaD=^1`TLW?NZ-gRM8i%e#u$Yz7?4~_OA3t z3<=ZERohr2+G1*ah*8d@JAgyvSP!q)arUKThy825hw3ksuiFUK_vRlB< zDg5POg{0JJT8@|Z6$C%(F$jMKQbo90eGiF3aHbk8d`7t=-uSs7InX|HWPd2iOcsDg zv+03{G#@q9bQp>bfj5#1#l-}6>{SK-g>TByl3fS02Gwe0era*r&3Js7AvKtD1$ADw z?*Y<~6{`n-vOV?@=y6Sq-E(N<*5YMfGaF%K+Ndi(kZZ;k5q#1wq+t!&TyJMbx}qI} zt;Y{ZT=?CV0q(YC`WQrt?0vtH^)+gJ!a7z^{uM_mhhArCD&6CAb@;+(G$Xyou?A)*7y-Ew~=wZH7BO`vD- zW5X97Q3jJmS1TJ&rM}OdbR+};aMC9xQV<9S?GZ(NR#apPuL?d*Tdqr~SF(eWG@S@t z4UX}z^S&yjtjwQ&kF1R%(M@_fzJW-mvE+=(M<5xlS+gSZd~%6$7TDb;oLQgyu1MJh zlfQ@=QrLJ$A_s?ASsBDcjdpWn`d;20@*U(~{Tk3)5Qx)DrX3W}yecF$J+z%ut zdvqyU%?TG$ZHk0uS)~{&)7X9TqPU?LD#0`QQUdH{tH*EoMLr`AScyFY2NIJGRl?P1g{YEK-Yaip!}51U#gls05Z=8Afmx-~&yon+%+9Et z^?!_Md=$jR)z(D5cj+mb>5Rs&dozr~NW6Mu&PZV+);ipcz`EEIL6!_BcsH~T&0N?8 zv51olb+=3+*1fJp1o>@mE*8D%cfIr@F1?zY#XRVeb+fMm$;&rkEho_9rkjyP64 z>M70>;D3WOdHZI=Mo+F^P6H1X7$$>a#=oCgHJ|N@N3Hmcr3d~bgv#%#(H)jqHeFf|R4ar1ljCF`>-dz{*?+91 zE2*3^pI8?KU3U7nGhDlo23-&0buFX@s8p@n=u)G z5@%{ay2oo59dK%&+yzz4+k?0Qx!@y{AG!v^YR`^J>J+ckCTw33JW{ld7yFUA4^Emg z0#PSs=RKqcH=eD2{88e)qj#!u3&5jJNzQP(X7qC{=YiQ_3sM?la}euT254vgJ<4`u&-w>Q)_ag&{$e>f=9K&Z_u#m zWF>m-Q|D>verL(yt)of6<zB*~KD9}l6juC6>bRs5!4~kk3;F@YtH(HHtNbPm<;Y2$0r(?VSQG}5c}r>i5E|MghQ^_Osj;oU1xvP zUyuGlzIi)noij5GKD0v8k594b8L16a+$+y-y8wwXP8;^d^R8iR(=YaC6V`AEIi+`6 z?}DA&aU-gBr@^c$N=-F>e+4Yl5|M=>PwQFh^|fC@*+|{@$}aUCELr^F!nPe#9>lmNCEbycd3A#QvV8emZ>@-BF2oTd6rXq#o%E9^swJPpOMC z)tVilU%`ap+K9f&y}$|uOOKbMKPqNWqMaWar8@mz5B={H!OKiZl$e=4k;<+c6)K_tli%$JN+DG`W-!>x@0{oznSIs z4QegfeeVd-+n%5zU23Laz;Fet4Apz*;m!tJU_N=>X$&AgU*pqfcoubK@T_q?14fTBeM?dHzR9eR5118~#CZz@w_zV8XfxG2j?f2SNvwiyG@yrZSzN7gUX>;Wp2xI>_CeyzQKdRnyJFZLxqbiGID-txD`vhl9JT-x5jhS zWZLr}%~64!Y34&ef;+K}W?e#?OB&V_4!peCX9>xHpK840rX&}IX`C%De@tXPdsdgVQkiGnm1dS0p`m?x(P!FS;~v>dc}A9AV&fK=!J5`Hq=)F^ipx+K{W=U z<>wnz^3c?^$D&&}u40>>5v>`M zKgRm)jOumP(5M7ET*{c|Z4cSxsiBDw-C*h`awwSzjDEUY>wb7n5y2l9jBE5o89>7F z5@sT^ue2F|;ns}h#{fd4?k6ota{J@lhVMxDz)<%!{DdS|JzX8|tfkj}H>eYImO*IKD!5C+%ruIY%I+stmZ^N$B2-X zKVn**N=e}(Y=kKbcE)v%9&UF^oC@u#E2q-1{g~(YuC_>pIZHC=k>gRdtCW<yJ1v`)OQWQq&R#Is#0wNy~5>%`>0t@B6Ve0k=n5f zz9hncIrCDm)_3*P7T|&ezODE?G_Ku=7xNstJA%AkqHmOPfs$9wF+}!*;SO#8mOdFaky=Z8@>-;_iklg8mI?>#Ktx4Ys0u>tUyW=^-X@p#eIMGkz}~HpOep}5v@uV>dP)= z3}7JZE_4DCS+U~tRemv)0Uv*d)636JhP6KWf-N~s6!;5&@7<5HT*~@*C-dqA{6oZn z6*i%pW9_ax3L-Af@~lZ%3f%eUm#%uOrss9CTl~F-lyt_Y*83 zaMQVC15>PXic;T4=UnAX^fYHA8vi}#KQ%rKv8nstJ^!C+z2KE-gVDhtGsH?O>@ZL3 z%7i7F%8Y@O)CZ$`8p$Jrr(zvD7({w z+*b)WKS5f&sC+RMP}BkAC$)9-$E?6jP~DcUBIf(n^E79@zVkF@ z?8wV0UxM%2wRZ?#;hHSf)QTMXB(+4cW;nCsK1VnMhjo?+;vmQrhkUrR094~44PP~~e z_6Y_h&E?E!CZ|;PCQSGdZeO3a;u5qbTgIs~c9=qZsv_GrGnZ8k;){9y<;(KQ4q#>vqmm>VJ5}i35^!}>K zudfN}tji%gN z&5ik;NvEoK{m6os>c@v&}F=K^A_+PeX20)lZb` z&2;mUvx^n;48=c)7@qd4dg9|7p=rr$*oJcC!k!8A8^v1Jv!9~umF+y#o$5k7_C)LY zwa&%(5B-Yy1*g0yDOB7&0uD^A1N8aO?OpR-u?E?e9D|gmrn6qI&ejZHQds%4Z+J!= zq+5phi=;FWiFiUdM<(dJ>=~J(N7fC1k%ll{kQI>lNs=6}C@Ei0B;@SMoq$e9Hs!i2 zMK*Y#c<})jyrj~nzLPEbNrgDoHji5Jm4Kcxf8MY5vSeheS=I?;h-;C&G~Mk;{%D(H z+zc5-yXwpP9$>!N=X0yWFfG!-Z;oc3j?cjdvb_50t?XI4!t-K%)(S@!0ICadD+<+} z&DDz-&b)UtR&pQcz2G{{sQXjSb8_ebWhw60$ixg{Nsy~Q`DbJ&T6&^)EZLdrglO_% z*>K1B87mm4;nG+O27Tp|l4XE{4ei-xkD|?Jz09UmfZ1nm<-9K<|MYA&!G0Qh#6QO$leU2^7kWhvt2K_lN`TfX5 z*RZkh6{$pgDy=Nmxu>7U{1F8)b?(ZnyVjezd|V|F;|IoI;pJc6pMGQ(59i&*mCYvQ zXIg=0NaI7-0}O5*VO;EoI#7W6+f84W3d3*Ua)#g8aMK<_E4siDjfOTpOikp)|547b zEui(H5%AKbn-77a=;->Z83(fLFL}ZDGR}e1%%SE1JI;*d9{ zjNdZg^vKGnlIY?w;DPA}Z|XVkd@B0J3BPybUz?pAyWHyFy6tz-Q%%+LLES<2gd$=e zC~r-@-RKck#!qi1`x@)p zlVIQQk95e97E^=9Ka&tc$))!C6W-!G_#F@bpxPKoeYhIKh>px=pfyDGK}HtXYcsS8 zhy0M4+in<-b0y^IkrNZgKVBSW&3a&#Yf}`-B{j#09dXX~BM45_6V;)9kjeMrJzn)A zl6cfbgi87WI(GCQMTb2WW=K{yB-GTk6BxfOAa-A01dd=D)?Mk`TI|F$q)lgSsyb*l zr)oqbZK=%;htQgP%1wkP+2NPk1I1B-}8jKp^Xe8ikwc5+~Xe@!?x99=#05SH? zHW4+Z)j+3%Mxt`0?I%26mxo@2;H)}c3k|J9k)!>OI7Ny+vr${M~_8tb%bZZ{v&J2^D+?$+IXAwdT;VUJ$ zI|j``hdYzoT46P`qfA=k(W`(5orqmwkBZ6dm>;#opWj=-h7t|JLr1G;tL~}EN_Oj= z|M|@;@#IaVms(BRl=%PkH?aJd^dox!lL<>dW>iGBNI$+9S`RxQAp=)AAs3@&>)W}oSjWZyphyz z^i55F>+ns&7s!~8Cl31#mj2pPz}Hf*6umnBxx%pY>ym#gdq6wNuR?~dXI`#rIq*~Orr6dtUHdaC8O2dT@z7C%3j)i zey?1W$_LZ+Ao{g=%*oTqD}*5)KCoa77+Jo*q^j3V38>nsgedEfJ%bKWmc1lbwS{V`F3G|GVPujD5>Wd&4)d5ww( zGt3J=Y4c347MF?FV^Cvj9u7E6o)+|d_g0kCfOGp~ew*rMa$psQzO{;S*^^W#Zag9p zGsz1_gz-UgY#`>a(fAM807B^&#a6as;azwF%1l0Ueep-9Op@?YJ+4`}ec+OVF{NYu z%N+a4`FE$Xyc6aZFy<;(h)K4*4sxrfKC*eUBnn#ra~aOZsYc(@-aNRYQY*uarQj%v z0Q44g@kaH_Q?+}uLB;wNhI+HZ?;$-guEVf=moKN6&taw0!Sx?4J&&Q4P`g{sX_SPR z97ARo9N~G@P(5h=#VdmhaC(ZPsNa@`AJXQ< zQHSglY#wv!pb*_Z-e@k2R3A8uEFd?+Mt%BKHZ2*N>Y~SuYPcLqx>1^JcV_xDMy=|Y zHoA$w0h(EKUC%)FpU-+rgFW@e)(L`ZWK&Oa&KOIXCgbB&PNuqsC(Lrp_Sv^v~FFAXGOhrFJhtH2&bAo*rf7eFnc%gAJf#J1Ov@AT$X{Q{Wd` z5WE{%ZxMt;Xbwn1;vq{Xg2J}+xBeH%hr$o9!nWN01ISN$5nVNj*lnsQN%z4# z|6iP4!}?M30L8$Z-@>mApP4wHV2*fC#YK5^FI$R}h3a>5P$Ppyo%4;lxk^4!NDf)w zEII$9hSMv&U*9|2oo3#0^GeZTv19E#MzCHM+!fT*#7Vd;Wp(cA#`}Q{*u}0Z8_AOk zJsHF!Wg%B){)+r`L6Ne!6eFCURkD?DqE<(lL2B3zRy%SS2*E{ZeJ$IpIU$SYH*6sG zVPa>RpHf)lE9^IMJL%N=M23?-}H2O2MyUazlaGBty+(?C{_xU4?Ne_m;H1S!sM zpF#|jj!;W>rvy6)3s7vDL$5J#sQkw-ubx9gW$3P*LzHI_C#?1xjyp#nS)s~w9;mMyz@6(@88<9jONyhuy4YvZ*2`TirsmpO`JeI(;uk2z0*y&Q{WMH+#TsO&sXm<}bKHGf@)C z%I|h+aFW$~S5f(22EFGIUBcwo_0zAazYP=-KS`|GjFf!?86l=XDF(ChsxF%1U=x4V3K=WIFLxtkYY|n?<~h_P69oMcI*z*Qds^Qa>YR8f?RrIS*u> z>iY0W$~kwQ)ZUI}#AO*&5(S5%#kIve=Wu48r)$KR@{Gn)$`qY)Q{hlzB(;Igos=UVG+de3&ix=Pv-t?j@5<*io&PerSN~KM zVB%)@t27oUx#-p1_^q_?4_*O0@K087^RShRQdEE)#$!0W79C0{m0S zyt1NtU2qSO!?$ity0{z!A2^_>c&m`wX7ulESx8Tt`?-Fc;c=s)hR%CMa8zUMB0P)e zXim=2Jm!fpz`g+;@^&Uc>8H9b!taFflLV9B5K6(qP}*Tk^oFV58=9Cr%Kp$X^; z-t;|WU-ywiJ>m{iy$kX)Kz%@S%wh+=e7hkUXuaS2HNl}2>+SJvPEJgD;-C0izfU@t zuEFOgZ3uhqBSK)lcRE81cEV&!4oz|4g-$W7?-MpeL}X53P^$?*S$2-PEKE|gxR1GB znX2{}%%_;Z7!6;9=@;WupF}c>?mk-k(28?DThX2NUz3vGB+tl{O7F6k`X+-p?^21 zNHQC`eA?2C3}DVAKs-V=`pNu;A_s`c^*>v17tPP%p` zq_Rjp?7<;VCX9$CUQgwm#D#A=PXobf&7k0Lw`bh%x`m7+n%totZc4EpNMvHK3C(jQ zhZNLPD2Vkbw~q-RCq^2c%?NW;;{i3RTUCPDk`rXcIUIf+GmaDd1XOHOs?&2pZFHNP zf(pcT)b=hiAJ7z>!kO1Wuh6wIQ3)?>x3>V1wo${v#2BMARr?-z7GgGQR)_eBT zdn)2{y2B9C)PO-By^9Q_&z{JftKOUzD355waaM{}PK}FS`aDu9*dH-0xZg6$pCTP) zd&JmK=Nr}b1*d^3K_Y9dN`SZw|FHhk-_{A*g{`D~chzxmDHkW38x4imx0TKN43 z6X_11+(P!0HH=ns1)!@`L0TY5kaEtWX^1k3kyq4rbjesCjdordVxy8B_`V06Vn@tv zhBDWSch-FRtYY$1U3n3E(Dj0+V|oq7puazHYrOzq4YZLx{%~%KE0HIdGtb@NvNFDP+rbJ{7i+DO&w@FvN*{jIqdk3gjj9!dB)^FBA1Ut1W3jf#%1` z(I9YFJh^Z>RC9s6BYjROMFyramk~LIRs*h6f(OOyD99(0FX|Lp?*Ke75;+C47;zr8 z_CM;;UNzA=R3yrD&`u1u1b`*3Jz0LL#LN_Ft5bH|V&qDK5{Wsnp^+Um>anbej3l2ISL5h!0F^z+8L^egfV zaA|>#)(besn7!=f#G&GN3p#-gJ2;ZIY0QTP@mRpS6k~u@*a+|xE@$Sx==^A$rDFv^ zQW$JGdrKYUTVlD%&PsAV4r6*Ir&^MaJJ!a#T?Tk-!u?kjy4%* z)b25!Q{)A|-uAc%$h;C-Gx5jsYL4;)tp^;UJW~s(6=u95P@X>Y4S8{6>CQ+a@q@;q z_@S~p2bM%0X3KX%#6ectZE|Jw7kc-hX=}0uj=}m+O5ZODxRjU_WP^zbgr1$IKf2Q# zKI9b&fdc|fZ$q#G0T-kH&Qz>J|G`wiw{GS(pjnRCHl%~GGnA>_wUeeEr}FXdl9Co{ zaGwKba)d(2yS}Skj*ZCc2n;X3zS}C;<>*vU&(Oy`^B5>z<6_x!WHA3kWeUo<*yfjE zi>A?tDCs-7A4#Jmo*W%ty!>Lh>!6pMY>@GEJR8!TB&Hm-b0VM5U)&oXckcL%1B{8q zsVSG%qYb=%dF8l?Sl1N$DB*G*xPZ!=4yECz>d4w*LTQoNTJ#_7PFhabwk2Z@6o0ex z2|Hfs388OZG6HF`j%gu@46nc`>qJjZ!CSbDL=kKGJ!S?sEX>rw4nax&@_c zFIkcK0XA;%4aF)AOMnavx#YmgDY70&b2v?{I&m9Kq;mW34!Em7f?|JY$3wAgthT{V z=fAUiXjz_A18Dk|*2c1vRi?u2+OA%Bf@jUw8a?iNx^gQUH@lEkV(RD%*8pEKGF_M) zl5V^E`K-})T5wucbIdg~e!1aG^C*miMn#cpPsWSmVa<F2rBlB+MG1DCp;8wyFO52gx+um$hWG;~c9fi1ow>~#Nb>qb4jx8Jmv82)++5JGl; z6!!XJ=zAtJ$6fQF)tg>x9r;~z+CQQ&DdvQg2}7HRSB;BFgP)xL^U6DVqtnhcMI&Ax zphImktHoAwR(@PwF_jdu^{<9OpOjZ-TKEI(>Bv69!^#|hCf(PcajHJ87T#xn;Fpf^ zFL8iOv=`H(KUya*He6GpD}-hJKLD0&2@|r8B;5|1}4BuIJZqvOMi(n>UbVL zi}81W8g<(d_3aq`Er_S!lj3~{?m$hIFd%7b`GUiWtC+yzQ5n%5b_{I)+;Ddd4?`bF z4epsL8%?I}@LCr4#}etIDVEjQ+UP1%&A_w^+sx879s=%iSV==kAic17HJMQ#eqeh{ zuZ&e|P~G%{!NA$XS?YSd3%vOOU{1LST1Bz*g8ZrCc|$zQr9(W$g*GYn5xA_r?ZnQv zWjEbNlS!WB$QK{=?z>!t`ymfWBOWOqbGo|aq4Z>MX_lW$VQyOLONiUW zCRG1u(KAK}es1zrXu))7L2Zuk6y&SM=Dkz3629uiX-{V;L-VJKBUTf>MWRg&4;ncF zt8j3T%fn09R~6+XD9{CC*>lZPYQ$&J8KgQ@bUL3wR>%1iScJUpq{4i8#6!CheA?hL zqm1kwP6~}*5`19)&-|Pj2XwKI?D7*SeHGMxe4h6C@r0e{i@i(%?M*~h-tjXCFY-;t z*a|~;tWG;4Oj|sL+gE&`uh_-Nl0_6%IUZf5B?M@)YWLz<%CKt*+8Th_H>@y- zHGctkw^n*zY@as5Kitj^&mvxz93^J5+lC`d5>Sq2c^G5MwD3o&P%v<8T^YRPHzoD= zM|@`Uzu~%(|B@olH}JhF257lUvf z-JaF?rQA&Y5x(Yl48=7{ssy>cm@p}dYLcI2Ow?QBPw*VbTC>D6i`m1-n}j|qw4nW~ zUH<8I&oF_w@Nj#YZEABi7a=Zej|Bcv9+wtrrv~$~MQFDUNsvTKFVcExxWd4oO_OiI zKyy=qjJ3)m_+&SPrjk)F>0)-CdV6p(%jaR36Gv7rXUk2NrJF^H z&pq9aD&#)U;huE~c{~RJ1C1sRhQ`sH2-Q8dblmpJOp-xWt_FIw^tYA7>yZEConLa_SI4Zy?opiFMPiLD3t!8F}UspfGgv_ zv`^Qql5Um$hvY?f;{H9AO1G^4c}s&X%;>_5?mXzu!~bd?V68OpPZa>&UefI)-CokM z``^0}O2;*HTtkNsI(*RKgAN~b`0)2@5_O=c14SJu>OfHkiaJo#f#UxwP<*>8t&9R< zzux6&LxkO$UoP!_Ju_1vvrbLp_(1~lZ}o?OF75p3;VWzt$p%dw<2=}q@! n-D>FehVG>NpA7{ADd3-sKWeX>cnthRJ#SCH@7RAi`QzUJOr1h1 literal 0 HcmV?d00001 From f5450e00e93240bd694d8a158188ee2394dd8a4a Mon Sep 17 00:00:00 2001 From: Gleb <144066346+GlebBobkov@users.noreply.github.com> Date: Wed, 1 May 2024 12:23:26 +0300 Subject: [PATCH 10/10] Update README.md --- README.md | 144 ++++++------------------------------------------------ 1 file changed, 16 insertions(+), 128 deletions(-) diff --git a/README.md b/README.md index 79c801a..69985e2 100644 --- a/README.md +++ b/README.md @@ -1,143 +1,31 @@ # my_first_tool -**This program consists of 3 tools:** +**This program consists several of my homeworks:** +## Main script +**Main.py** consists of several tools in the form of +1) functions: - 'dna_rna_tools' - List of procedures: `transcribe` (return the transcribed sequence), `reverse` (return the reversed sequence), `complement` (return the complementary sequence), `reverse_complement` (return the reverse complementary sequence); -- 'protein_tools'- List of procedures: `length` (return the number of amino acids in protein sequence(s)), `percentage` (return percentage of each amino acid in sequence), `pattern` (return all non-overlaping instances of a given pattern in sequences), `3Letter_name` (return three-letter amino acids into a three-letter amino acids), `DNA_code` ( return transformed protein sequence(s) to DNA sequence(s)), `fastq_tools` (return percentage of each amino acid in sequence); -- 'fastq_tools' - Procedure: filtering of the dictionary of sequences by the length, quality of sequencing of each nucleotides and GC% content. +- 'protein_tools'- List of procedures: `length` (return the number of amino acids in protein sequence(s)), `percentage` (return percentage of each amino acid in sequence), `pattern` (return all non-overlaping instances of a given pattern in sequences), `3Letter_name` (return three-letter amino acids into a three-letter amino acids), `DNA_code` ( return transformed protein sequence(s) to DNA sequence(s)), +2) Class with using BioPython: +- 'filter_fastq'- Procedure: filtering of the dictionary of sequences by the length, quality of sequencing of each nucleotides and GC% content. -## run_dna_rna_tools.py -> *description of how the run_dna_rna_tools.py program works* +Also here You can find abbility to fork with API and telegramm_bot -This program contains the function `run_dna_rna_tools`. The `run_dna_rna_tools` function takes as input an arbitrary number of arguments containing DNA or RNA sequences in the form (*str*), as well as the name of the procedure to be performed, specified as the last argument. After this, the command performs the specified action on all transmitted sequences. If one sequence is supplied, a string with the result is returned. If several are submitted, a list of strings is returned. -**Use example** -```python -run_dna_rna_tools('ATG', 'transcribe') # 'AUG' -run_dna_rna_tools('ATG', 'reverse') # 'GTA' -run_dna_rna_tools('AtG', 'complement') # 'TaC' -run_dna_rna_tools('ATg', 'reverse_complement') # 'cAT' -run_dna_rna_tools('ATG', 'aT', 'reverse') # ['GTA', 'Ta'] -``` +## bio_files_processor +**bio_files_processor.py** consists of function for converting multiline fasta to one line fasta. -## protein_tools.py -> *Discription how the protein_tools.py works:* -This program contains the function `protein_tool`. The `protein_tool` function takes as input an arbitrary number of arguments in the form of amino acid (aa)/protein sequences of type *str*, as well as the name for the procedure to be performed. After this, the function performs the specified action on all provided sequences. Carefully read the rules of usage for each option, because they specify correct ways of entering arguments, as well as the output and the type of data in the output. -### :warning: Attention: 1) The programm is register-dependent; 2) Before using some of the options read 'Procedures description' carefully. 3) If you input sequenses or 'options' incorrectly, the program will provide you with helpful error messages. +## custom_random_forest +**custom_random_forest.py** consists of custom Class for creating CustomRandomForest but instead of usual RandomForest from sklearn library it has abbility for parallel work. -***compare*** -**Introduction** -The **compare** procedure compares the first amino acid sequence provided with the following ones. -***Inputs*** -To start using the length procedure, enter sevreal arguments: -- _an arbitrary number_ of sequences, where the first sequence is a reference to which the following sequences are compared; each argument should be of type 'str'. -- _second-to-last_ argument is the number of decimals to round the number to; type 'int' -- _last_ argument determines whether percentages are returned instead of fractions; type 'bool' -**Outputs** -It returns a 'dict' object where: -- *keys* are compared-to sequences (type str) -- *values* are either fractions or percentages (type float). -**Usage example** -```python -protein_tool('LAlLAlwWGPdPA', 'LAlLAl', 3, False, options = 'compare') # {'LAlLAl': 1.0} -protein_tool('LAlLAlwWGPdPA', 'LAlLAl', 'GPdPA', 3, True, options = 'compare')) # {'LAlLAl': 100.0, 'GPdPA': 20.0} -``` +## test_my_first_tool +**test_my_first_tool.py** consist of tests for chaking mistakes in the code and several processes -***length*** -**Introduction** -The **length** procedure calculates the length of protein sequence(s) (equal to the number of amino acids). -**Inputs** -To start using the length procedure, enter one or more protein sequences for which you want to get a summary, and at the end add `options = ‘length’`. -**Outputs** -The result of the procedure is a list with the numbers of amino acids in each sequence. The list contains only numbers of amico acids in the sequence. -**Usage example*** -```python -protein_tool('LAlLAlwWGPdPA', options = 'length') # [13] -protein_tool('RRRrrrR', 'WGPdPA', 'LAlLAlw', options = 'length') # [7, 6, 7] -``` +## Showcases.ipynb +**Showcases.ipynb** consist of showing how the CustomRandomForest work -***percentage*** -**Introduction** -The **percentage** procedure calculates the percentage of all 20 proteinogenic amino acid residues, case-sensitive in the protein sequences -**Input** -To start using the count_percentage procedure, enter one or more protein sequences for which you want to get a summary, and at the end add `options = ‘percentage’`. -**Outputs** -The result of the procedure is a list of dictionaries with the percentages of the corresponding amino acids in each sequence. The dictionary contains only amino acid residues whose percentage in the sequence is not equal to 0 (which are contained in the sequence at all). Also, the dictionary is ordered from the largest percentage of content to the smallest. Cases of amino acid residues are taken into account. -> :warning: Attention: We use rounding to 2 decimal places. In some cases, **the sum of percentages** of all amino acid residues for sequence **may not be exactly 100%** due to rounding. -**Usage example** -```python -protein_tool('LAlLAlwWGPdPA', options = 'percentage') # [{'A': 23.08, 'L': 15.38, 'l': 15.38, 'P': 15.38, 'w': 7.69, 'W': 7.69, 'G': 7.69, 'd': 7.69}] -protein_tool('RRRrrrR', 'WGPdPA', 'LAlLAlw', options = 'percentage') # [{'R': 57.14, 'r': 42.86}, {'P': 33.33, 'W': 16.67, 'G': 16.67, 'd': 16.67, 'A': 16.67}, {'L': 28.57, 'A': 28.57, 'l': 28.57, 'w': 14.29}] -``` -***pattern*** -**Introduction** -The **pattern** procedure finds all non-overlaping cases of a given pattern in amino acid sequence(s) provided. -**Inputs** -To start using the pattern procedure, enter one or more protein sequences for which you want to get a summary, where the first sequence is a pattern, which is searched for in the following sequences; each argument should be of type 'str' and at the end add `options = ‘pattern’`. -The *find_pattern()* function goes through a sequence in the following way: it takes a subsequence of amino acids in front of an index equal in length to the pattern and compares it to the pattern. If there is no match, index is moved one amino acid to the end of the sequence. If there is a match, the index is saved, and the function jumps to an aminoacid next to the end of the subsequence, then the algorithm repeats. Comparison is performed by *compare_pattern* subfunction. -The image explanation of that function. -![The image explanation of that function **pattern**](https://github.com/GlebBobkov/HW4_Bobkov/raw/HW4_Bobkov/HW4_Bobkov/explanation.jpg) -**Outputs** -The result of this procedure is a 'dict' object where: -- *keys* are amino acid sequences (type 'str') -- _values_ are lists where the first element is a number of pattern instances in a given sequence, and the following elements are indexes of these occurances -**Usage example** -```python -protein_tool('LAlLAlwWGPdPA', 'LAlLAl', 'GPdPA', options = 'pattern') # {'LAlLAl': [2, 0, 3], 'GPdPA': [0]} -protein_tool('LAlLAlwWGPdPA', 'AlLAl', options = 'pattern') # {'AlLAl': [1, 2]} -``` -***3Letter_name*** -**Introduction** -The **3Letter_name** procedure transforms one-letter amino acid entry sequences to three-letter amino acid sequences, separated by a specified separator. It is a case-sensitive procedure. -**Inputs** -To start using the rename_three_letter_name procedure, enter one or more protein sequences for which you want to get three-letter sequences. After the protein sequences put a symbol (type 'str') that will be a separator. And specify the `options = ‘3Letter_name’`. -**Outputs** -The result of the procedure is a list of three-letter sequences. Each amino acid is separated by the specified separator. The case of the three-letter amino acid coincides with the case of the one-letter designation at the input. -**Usage example** -```python -protein_tool('wWGPdPA', '', options = '3Letter_name') # ['trpTRPGLYPROaspPROALA'] -protein_tool('LAlLAlwWGPdPA', '-', options = '3Letter_name') # ['LEU-ALA-leu-LEU-ALA-leu-trp-TRP-GLY-PRO-asp-PRO-ALA'] -protein_tool('RRRrrrR', 'WGPdPA', 'LAlLAlw', options = 'percentage') # [{'R': 57.14, 'r': 42.86}, {'P': 33.33, 'W': 16.67, 'G': 16.67, 'd': 16.67, 'A': 16.67}, {'L': 28.57, 'A': 28.57, 'l': 28.57, 'w': 14.29}] -protein_tool('qwerty', 'G', options = '3Letter_name') # ['glnGtrpGgluGargGthrGtyr'] -``` - -***DNA_code*** -**Introduction** -The **DNA_code** procedure transforms a protein into a DNA sequence that may encode it (this can be used in genetic ingeneering). -P.S. codons chosen at the discretion of the tool authors. -**Inputs** -To start using the DNA_code procedure, enter one or more protein sequences for which you want to get a summary, and at the end add `options = ‘DNA_code’`. -**Outputs** -The result of the procedure is a list with type 'str' elements - nucleotide sequence that corresponds to the amino acid sequence. -**Usage example** -```python -protein_tool('LAlLAlwWGPdPA', options = 'DNA_code') # ['TTAGCAttaTTAGCAttatggTGGGGGCCCgcaCCCGCA'] -protein_tool('RRRrrrR', 'WGPdPA', 'LAlLAlw', options = 'DNA_code') # ['CGACGACGAcgacgacgaCGA', 'TGGGGGCCCgcaCCCGCA', 'TTAGCAttaTTAGCAttatgg'] -``` - -## fastq_tools.py -> *description of how the run_dna_rna_tools.py program works* - -This program contains the function `fastq_tool`. The `fastq_tools` function takes as input a dictionary of the fastaq data and sort it by the length, quality of sequencing of each nucleotides and GC% content. -The result of the running of the tool is sorted dictionary of the dictionary from the input. -'gc_bounds' - GC composition interval (in percentage) for filtering (by default results (0, 100), i.e. all reads in the direction). If you pass one number as an argument, it is considered to be the upper limit. Examples: gc_bounds = (20, 80) - save only reads with GC content from 20 to 80%, gc_bounds = 44.4 - save reads with GC content less than 44.4%. -'length_bounds' - length of the interval for filtering, still gc_bounds, but by default it is (0, 2**32). -'quality_threshold' - threshold value of average read quality for the filter, default is 0 (phred33 scale). Reads contribute to quality for all nucleotides below the threshold are discarded. - -**Use example** -```python -seqs = { - # 'name' : ('sequence', 'quality') - '@SRX079804:1:SRR292678:1:1101:21885:21885': ('ACAGCAACATAAACATGATGGGATGGCGTAAGCCCCCGAGATATCAGTTTACCCAGGATAAGAGATTAAATTATGAGCAACATTATTAA', 'FGGGFGGGFGGGFGDFGCEBB@CCDFDDFFFFBFFGFGEFDFFFF;D@DD>C@DDGGGDFGDGG?GFGFEGFGGEF@FDGGGFGFBGGD'), - '@SRX079804:1:SRR292678:1:1101:24563:24563': ('ATTAGCGAGGAGGAGTGCTGAGAAGATGTCGCCTACGCCGTTGAAATTCCCTTCAATCAGGGGGTACTGGAGGATACGAGTTTGTGTG', 'BFFFFFFFB@B@A<@D>BDDACDDDEBEDEFFFBFFFEFFDFFF=CC@DDFD8FFFFFFF8/+.2,@7<<:?B/:<><-><@.A*C>D'), - '@SRX079804:1:SRR292678:1:1101:30161:30161': ('GAACGACAGCAGCTCCTGCATAACCGCGTCCTTCTTCTTTAGCGTTGTGCAAAGCATGTTTTGTATTACGGGCATCTCGAGCGAATC', 'DFFFEGDGGGGFGGEDCCDCEFFFFCCCCCB>CEBFGFBGGG?DE=:6@=>AD?D8DCEE:>EEABE5D@5:DDCA;EEE-DCD'), - '@SRX079804:1:SRR292678:1:1101:171075:171075': ('CATTATAGTAATACGGAAGATGACTTGCTGTTATCATTACAGCTCCATCGCATGAATAATTCTCTAATATAGTTGTCAT', 'HGHHHHGFHHHHFHHEHHHHFGEHFGFGGGHHEEGHHEEHBHHFGDDECEGGGEFGFC@DDGGGDFGDGG?GFGFEGFGGEF@FDGGGFGFBGGD'), '@SRX079804:1:SRR292678:1:1101:171075:171075': ('CATTATAGTAATACGGAAGATGACTTGCTGTTATCATTACAGCTCCATCGCATGAATAATTCTCTAATATAGTTGTCAT', 'HGHHHHGFHHHHFHHEHHHHFGEHFGFGGGHHEEGHHEEHBHHFGDDECEGGGEFGF