From 30f499244e3f00b2605c249a6fdc5f4160068b0f Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sat, 30 Sep 2023 21:03:27 +0300 Subject: [PATCH 01/53] Initial commit --- HW4_Voskoboinikov/ultimate_protein_tools.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 HW4_Voskoboinikov/ultimate_protein_tools.py diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py new file mode 100644 index 0000000..e69de29 From 4e2030e2b8f3fe81bbc7b6a12a9e4b621032b21a Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sat, 30 Sep 2023 21:09:26 +0300 Subject: [PATCH 02/53] Add const AMINOACID_DICT --- HW4_Voskoboinikov/ultimate_protein_tools.py | 22 +++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index e69de29..2468c43 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -0,0 +1,22 @@ +AMINOACID_DICT = { + 'A': 'Alanine', 'a': 'alanine', + 'C': 'Cysteine', 'c': 'cysteine', + 'D': 'Aspartic acid', 'd': 'aspartic acid', + 'E': 'Glutamic acid', 'e': 'glutamic acid', + 'F': 'Phenylalanine', 'f': 'Phenylalanine', + 'G': 'Glycine', 'g': 'glycine', + 'H': 'Histidine', 'h': 'histidine', + 'I': 'Isoleucine', 'i': 'isoleucine', + 'K': 'Lysine', 'k': 'lysine', + 'L': 'Leucine', 'l': 'leucine', + 'M': 'Methionine', 'm': 'methionine', + 'N': 'Asparagine', 'n': 'asparagine', + 'P': 'Proline', 'p': 'proline', + 'Q': 'Glutamine', 'q': 'glutamine', + 'R': 'Arginine', 'r': 'arginine', + 'S': 'Serine', 's': 'serine', + 'T': 'Threonine', 't': 'threonine', + 'V': 'Valine', 'v': 'valine', + 'W': 'Tryptophan', 'w': 'tryptophan', + 'Y': 'Tyrosine', 'y': 'tyrosine' + } \ No newline at end of file From 2982a8bcf920d591052d94a785e10c7f33927c28 Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sat, 30 Sep 2023 21:10:14 +0300 Subject: [PATCH 03/53] Add function length_of_protein --- HW4_Voskoboinikov/ultimate_protein_tools.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 2468c43..6345787 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -19,4 +19,18 @@ 'V': 'Valine', 'v': 'valine', 'W': 'Tryptophan', 'w': 'tryptophan', 'Y': 'Tyrosine', 'y': 'tyrosine' - } \ No newline at end of file + } + + +def length_of_protein(seq: str) -> int: + """ + Calculates the length of a protein. + + Argument: + - seq (str): sequence to calculate the length + + Return: + - int: sequence length + """ + + return len(seq) \ No newline at end of file From c49f6857f6a14a583c2eb381b5758da02156d866 Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sat, 30 Sep 2023 21:10:50 +0300 Subject: [PATCH 04/53] Add function count_aa --- HW4_Voskoboinikov/ultimate_protein_tools.py | 27 ++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 6345787..133115c 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -33,4 +33,29 @@ def length_of_protein(seq: str) -> int: - int: sequence length """ - return len(seq) \ No newline at end of file + return len(seq) + + +def count_aa(seq: str, *, aminoacids: str = None) -> dict: + """ + Counts the number of given or all amino acids in a protein sequence. + + Arguments: + - seq (str): sequence to count amino acids + - aminoacids (str): which amino acids to count in sequence + + Return: + - dict: a dictionary with amino acids and its count + """ + + aa_dict_count = {} + if (aminoacids is None) or (aminoacids == ''): + ''' + I added an additional condition for user-friendly experience. + E.g., we can want to find specific aminoacid, look on result and then look on all aminoacids. + Without this condition we have to delete keyword argument, but with it we can only make it empty. + ''' + aminoacids = ''.join(set(seq)) + for aa in aminoacids: + aa_dict_count[aa] = seq.count(aa) + return aa_dict_count \ No newline at end of file From c7b2e9ab83e55945a41923d2f9106fbe8e40049e Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sat, 30 Sep 2023 21:11:18 +0300 Subject: [PATCH 05/53] Add function get_fracture_of_aa --- HW4_Voskoboinikov/ultimate_protein_tools.py | 29 ++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 133115c..60abfae 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -58,4 +58,31 @@ def count_aa(seq: str, *, aminoacids: str = None) -> dict: aminoacids = ''.join(set(seq)) for aa in aminoacids: aa_dict_count[aa] = seq.count(aa) - return aa_dict_count \ No newline at end of file + return aa_dict_count + + +def get_fracture_of_aa(seq: str, *, show_as_percentage: bool = False, aminoacids: str = None) -> dict: + """ + Returns the fracture or percentage of amino acids in a protein sequence. + + Arguments: + - seq (str): sequence in which you need to calculate the fracture of amino acids + - show_as_percentage (bool): change it to True, if you want to get results with percentages + - aminoacids (str): the fracture of which amino acids to count in the sequence + + Return: + - dict: a dictionary with amino acids and its fracture or percentage + """ + + if show_as_percentage: + mult = 100 + round_var = 2 + else: + mult = 1 + round_var = 4 + aa_dict_count = count_aa(seq, aminoacids=aminoacids) + aa_dict_percent = {} + len_of_protein = length_of_protein(seq) + for aa, count in aa_dict_count.items(): + aa_dict_percent[aa] = round(count / len_of_protein * mult, round_var) + return aa_dict_percent \ No newline at end of file From 5167de74e377b1ba372dc9b3be082ced219aabac Mon Sep 17 00:00:00 2001 From: Tatiana Lisitsa Date: Sat, 30 Sep 2023 21:31:22 +0300 Subject: [PATCH 06/53] Add constant H2O_WEIGHT and constant AA_MASS_DICT --- HW4_Voskoboinikov/ultimate_protein_tools.py | 26 +++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 60abfae..296268a 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -22,6 +22,32 @@ } +H2O_WEIGHT: float = 18.01468 + + +AA_MASS_DICT: dict[str, float] = { + 'G': 75.0659, 'g': 75.0659, + 'L': 131.17262, 'l': 131.17262, + 'Y': 181.18894, 'y': 181.18894, + 'S': 105.09158, 's': 105.09158, + 'E': 147.12826, 'e': 147.12826, + 'Q': 146.1438, 'q': 146.1438, + 'D': 133.10158, 'd': 133.10158, + 'N': 132.11712, 'n': 132.11712, + 'F': 165.18994, 'f': 165.18994, + 'A': 89.09258, 'a': 89.09258, + 'K': 146.18716, 'k': 146.18716, + 'R': 174.20056, 'r': 174.20056, + 'H': 155.15466, 'h': 155.15466, + 'C': 121.15758, 'c': 121.15758, + 'V': 117.14594, 'v': 117.14594, + 'P': 115.13026, 'p': 115.13026, + 'W': 204.22648, 'w': 204.22648, + 'I': 131.17262, 'i': 131.17262, + 'M': 149.21094, 'm': 149.21094, + 'T': 119.11826, 't': 119.11826, + } + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. From 1b3b857c0396ec7a1a22d4d6ca25ed387fcc962c Mon Sep 17 00:00:00 2001 From: Tatiana Lisitsa Date: Sat, 30 Sep 2023 21:35:17 +0300 Subject: [PATCH 07/53] Add function calculate_protein_mass --- HW4_Voskoboinikov/ultimate_protein_tools.py | 30 ++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 296268a..4bd7c34 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -48,6 +48,7 @@ 'T': 119.11826, 't': 119.11826, } + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. @@ -111,4 +112,31 @@ def get_fracture_of_aa(seq: str, *, show_as_percentage: bool = False, aminoacids len_of_protein = length_of_protein(seq) for aa, count in aa_dict_count.items(): aa_dict_percent[aa] = round(count / len_of_protein * mult, round_var) - return aa_dict_percent \ No newline at end of file + return aa_dict_percent + + +def calculate_protein_mass(sequence: str, aa_atomic_mass: dict[str, float] = None) -> float: + """ + + Calculates the molecular mass of a protein based on its amino acid sequence and a dictionary of amino acid masses. + + Arguments / Args: + - sequence(str or list): A string or list of characters representing the amino acid sequence. + - aa_atomic_mass(dict): A dictionary linking amino acids to their masses in atomic mass units. + + Return: + - float: The molecular mass of a protein in atomic mass units, rounded to the third decimal place. + """ + + total_mass = 0.0 + if aa_atomic_mass is None: + aa_atomic_mass = AA_MASS_DICT + + for aa in sequence: + if aa in aa_atomic_mass: + total_mass += aa_atomic_mass[aa] + else: + raise ValueError(f'Unknown amino acid: {aa}') + total_mass = total_mass - H2O_WEIGHT * (len(sequence) - 1) + + return round(total_mass, 3) From 939b88b8cbd7365ed46bf44b563d82f87b9a1ec9 Mon Sep 17 00:00:00 2001 From: Tatiana Lisitsa Date: Sat, 30 Sep 2023 21:36:41 +0300 Subject: [PATCH 08/53] Add constant ATOMIC_MASS --- HW4_Voskoboinikov/ultimate_protein_tools.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 4bd7c34..a2df19d 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -49,6 +49,15 @@ } +ATOMIC_MASS: dict[str, float] = { + 'C': 12.011, + 'H': 1.00784, + 'O': 15.999, + 'N': 14.0067, + 'S': 32.065 +} + + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. @@ -127,7 +136,7 @@ def calculate_protein_mass(sequence: str, aa_atomic_mass: dict[str, float] = Non Return: - float: The molecular mass of a protein in atomic mass units, rounded to the third decimal place. """ - + total_mass = 0.0 if aa_atomic_mass is None: aa_atomic_mass = AA_MASS_DICT From db338c4f547948dd97af489e67895a49395fa8a3 Mon Sep 17 00:00:00 2001 From: Tatiana Lisitsa Date: Sat, 30 Sep 2023 21:38:09 +0300 Subject: [PATCH 09/53] Add function get_atomic_mass --- HW4_Voskoboinikov/ultimate_protein_tools.py | 37 +++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index a2df19d..56b13a8 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -149,3 +149,40 @@ def calculate_protein_mass(sequence: str, aa_atomic_mass: dict[str, float] = Non total_mass = total_mass - H2O_WEIGHT * (len(sequence) - 1) return round(total_mass, 3) + + +def get_atomic_mass(chem: str, atomic_mass: dict[str, float] = None) -> float: + """ + + Calculates the molecular mass of a biological molecule, primarily an amino acid, based on a simple chemical formula. + + Arguments / Args: + - chem (str): String representing a simple chemical formula, e.g. C2H5OH + - atomic_mass (dict[str, float], optional): A dictionary linking the chemical elements Carbon, Hydrogen, Oxygen, + Nitrogen, and Sulfur with their masses in atomic mass units. + + Return: + - float: Molecular mass of a biological molecule in atomic mass units. + """ + + total_mass = 0 + char = 0 # idx init + if atomic_mass is None: + atomic_mass = ATOMIC_MASS + while char < len(chem): + if chem[char].isalpha(): + element = chem[char] + char += 1 # очень надо, а то я опять бесконечный цикл сделала + if char < len(chem) and chem[char].isdigit(): + number = '' + while char < len(chem) and chem[char].isdigit(): + number += chem[char] + char += 1 # очень надо + total_mass += atomic_mass[element] * int(number) + else: + total_mass += atomic_mass[element] + else: + raise ValueError(f'Unknown elem: {chem[char]}') + + return total_mass + From 8543c26b847e5d65651c3ffc12933b5235c5b275 Mon Sep 17 00:00:00 2001 From: Tatiana Lisitsa Date: Sat, 30 Sep 2023 21:39:18 +0300 Subject: [PATCH 10/53] Add constant AA_NAME_DICT --- HW4_Voskoboinikov/ultimate_protein_tools.py | 26 ++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 56b13a8..471046e 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -58,6 +58,30 @@ } +AA_NAME_DICT: dict[str, str] = { + 'G': 'Gly', 'g': 'Gly', + 'L': 'Leu', 'l': 'Leu', + 'Y': 'Tyr', 'y': 'Tyr', + 'S': 'Ser', 's': 'Ser', + 'E': 'Glu', 'e': 'Glu', + 'Q': 'Gln', 'q': 'Gln', + 'D': 'Asp', 'd': 'Asp', + 'N': 'Asn', 'n': 'Asn', + 'F': 'Phe', 'f': 'Phe', + 'A': 'Ala', 'a': 'Ala', + 'K': 'Lys', 'k': 'Lys', + 'R': 'Arg', 'r': 'Arg', + 'H': 'His', 'h': 'His', + 'C': 'Cys', 'c': 'Cys', + 'V': 'Val', 'v': 'Val', + 'P': 'Pro', 'p': 'Pro', + 'W': 'Trp', 'w': 'Trp', + 'I': 'Ile', 'i': 'Ile', + 'M': 'Met', 'm': 'Met', + 'T': 'Thr', 't': 'Thr' + } + + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. @@ -164,7 +188,7 @@ def get_atomic_mass(chem: str, atomic_mass: dict[str, float] = None) -> float: Return: - float: Molecular mass of a biological molecule in atomic mass units. """ - + total_mass = 0 char = 0 # idx init if atomic_mass is None: From f9a61da4a43597a8400747a66f83977356dd6c68 Mon Sep 17 00:00:00 2001 From: Tatiana Lisitsa Date: Sat, 30 Sep 2023 21:41:21 +0300 Subject: [PATCH 11/53] Add function convert_aa_name --- HW4_Voskoboinikov/ultimate_protein_tools.py | 44 ++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 471046e..3fd222c 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -55,7 +55,7 @@ 'O': 15.999, 'N': 14.0067, 'S': 32.065 -} + } AA_NAME_DICT: dict[str, str] = { @@ -210,3 +210,45 @@ def get_atomic_mass(chem: str, atomic_mass: dict[str, float] = None) -> float: return total_mass + +def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = '', + use_default_register: bool = True) -> str: + """ + + Converts a sequence of one-letter amino acid codes to three-letter designations. + + Arguments / Args: + - sequence (str): String with one-letter amino acid codes. + - name_dict (dict[str, str], optional): A dictionary linking one-letter codes to three-letter designations. + If not provided, the standard AA_NAME_DICT dictionary is used. + - sep (str, optional): Separator between three-letter amino acid designations. There is no delimiter by default. + - use_default_register(bool, optional): Determines whether to preserve letter case in three-letter designations. + If True, the letters will be converted to upper or lower case depending on the case of the depending + on the case of the one-letter code. The default is False. + + Return: + - str: A string of three-letter amino acid designations separated by the specified delimiter. + """ + + new_name = '' + if name_dict is None: + name_dict = AA_NAME_DICT + for i, aa in enumerate(sequence): + if aa in name_dict: + if use_default_register is False: + new_name += name_dict[aa] + elif use_default_register is True: + if aa.isupper(): + new_name += name_dict[aa].upper() + else: + new_name += name_dict[aa].lower() + else: + if aa.isupper(): + new_name += name_dict[aa].lower() + else: + new_name += name_dict[aa].upper() + if sep and (i + 1) < len(sequence): + new_name += sep + else: + raise ValueError(f'Unknown amino acid: {aa}') + return new_name From f97b6c311e7c1f9d5019836145bc2b68d4420950 Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sat, 30 Sep 2023 22:50:49 +0300 Subject: [PATCH 12/53] Add a photo of the team --- HW4_Voskoboinikov/Wonderful_team.jpg | Bin 0 -> 52853 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 HW4_Voskoboinikov/Wonderful_team.jpg diff --git a/HW4_Voskoboinikov/Wonderful_team.jpg b/HW4_Voskoboinikov/Wonderful_team.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5b55826aecc55d86c8bc22fec2d90d6b9d46dedb GIT binary patch literal 52853 zcmcG$1z40z`!J3OVt{~@gdiYY(hbt((w)-Mtu!i1hjcCtOCv}~i-45m0!ue6OG+%T zOMQz+&ikJ8d*A=}o$tH;^XxUx%rnp2bI)CK&&}{$PFyaa;VH^0$fDi2aRcoI>O#Am zMw3FrxOwyHhj9}F69WVD)?KVysDyLp?(Ms{ICyxtIJo!m2_6#SKOlKd&&oQHvYe zGEouQG8rz$c&c2q7d%NL_6%8<{`oswMieiLsd_EZf6K}f89=pzR2at%93OntmNFm= zp+0`N#7Aiv)H_w4a!qgcRLldrn*CKMkAx{Xw_k}!%~<>#d|kPaacU0T&61L;?G2YwJlt)I^@BD6>Lo2eQGsJ?#Jdu>R87+Y;#Yu_o`2lr>FLL*wzT*J8-VYd8&y`=*J#zcGo)o^%>iEX#pw~) z@W!~caMOD=)FU=O)MhxzrgPaGw)pa$ouBz}9ZL+a!T7~>_@5gpQ8c733yiyzPmQmc z>#pMOyosgs3^|$67Cee3X7tXT63w$UTPhy*u^7?Aj^JyHa0oQP-8rN5@^UH9Lc^Si z=_$NMlx>4;ucFSbRtyUQCO6l4IIU|gQ06|z(-tZ9P?ozqWw6)#Sspu_A%A*tBA9?rtEnqBsMjlunGv* zAEwHK_@V1dyMx2-u6?D0Ac{xaV|@`eK{QXKs+`JsB9;x6%5YX$T{l~r36%Oqo#^R! z(u9etx`t%q?iA3CMw74UUIZLcWkRD9$TrZ>jCv?i-(2%$E@z(XG%C#oafwzL=@v+~ z3eYEQCyKCE;tWa%ISSX{v?Aw)&fQBEna?YmW0?BpKXaihmh}F5s#;T8T_d7YnTG=u z!-wrY2QWS&(5tGkgJU-aKE5@*!(d%VMO;1W>*bBIk}Vm26?Y|)Cws9_CeHK-dvdo! zsXoK{F7fCwaKD}&>;QScl_AuR=R&E*k0QaSR@cLAx9M<~*nVDG+gx|Ah~BKXhmy9> z*CxVfV@tq&sUvw4XenRSkE?D402xOCt>t6Qc*dXO*W#ZYthWjcaxn9bqMP_@g-Q@{$L@==qVu2TQfPBsJ6kzY8XbjAB=BUe;f|T+aFIGEMBT# z7#DjcRxx-Df#_9&%i*BPnxAllb3f7UExyt9>1g~4kxGi;?#IxLP}5XKjaF~3K0O$e zy{2gWX?WIve4p^cX|L7b%#v=mQC_1@%f7nvpY@i8PTf=5w^knORC?YCM>-2RI6Ga$16;0;Q8_J5uvJBkAI$LYZbSkv83 zW^5k`mU_Xb?lm-d{78kTBB^9dg}uC6t##@Tkm6YgHpEiveyt-v^1!CO!ni3?xiBYO zBbTT$Kb^u=&RUKYY^2)(gqKalmFw?#d4lzBcI_gH6E ztyQ=o<1_h@4AJ0ZVOl77ttB>gb8HRynh40BS=ZGCL45s9k}Inw2HZHSC{3?l%D{0C z6XcC({hwGj?Wi=0!8%K`PwM63HpmNIs2jv*F_p5Am*MrQp7@#o0BT}%is z=p%&d&h+rWpr7?;AYaU|xt?VayGvs7Bz>woS;ET08U&@e4@(O&K)iZRTazTt%;U=>a6; zrN;;k-7qcK)8NMg2{JqSqCvtT0}#)rhtM?qTa8_bkoct3IET{rK#_C7D(BZ@2QWPj z6{j@tt%TP=!2@Kb_01Y5YYvZ=z~~h*A1`EdZ19*hrDW#x7?nuixaUBN!A@@<6~Dh` z%(DI_kkwiez`!60cXvIWf9?=HZEj-fFY5WlS;F`#)I zcbgs}X{5z&FglKG>u=U^KkAt59H&CIH4`~@i6+>x?_W-(T8w~xTAiOq+`~jj zTR&rW4mKUaBuao?3H1vDY)%&fgB(RhXC)hR#uv=NV+alv+%M{bdhM+}!Q+UmzY0$i zs3^TU>bYG79}sV?i*Qd9&s~D7i-#9OUA9t+U$7vR7Qd(9!OYmyHa*f(NXUyXskXH> zIvOD0z1cKZp#bApY}^WJS5EgeYX0C*P3bc|ChU2MW_QqPGTo)$ytuf&N9iuxDo&-NLpNZBbR=k?nUrmZ0b>NZ~(y|q-PMf>MUS$PWfz(e7#i!+WP6Io82Jj zicDYsolGekA{C%P(aHjT2!2ii@i2aS6KA!25v4m1G~;%VpnEKib;w3dY2{d)=W$#~ z4(Wl#(suE{NA{+S!BWT!9131`*%lC5?rPT27SeL|_O!adud9ppwOK(L$+XfPn5h3u z>Q~$Hg_drwKzdF&=pujG)r!ozv@|NTN^{q+I5fJgw-z)Ux^6G-uFO$ytgY87@M)%P zGJ*JzKn**(R2V(6=#ybu#OqOiqbM)P!We)a&S$gU$&WH7?CNJ;VPG02PUIU;-uyrcoRz@sZ^>PYG!Lgi8O=d zB?~v!d)M*D3X+t23USTF?iASgt+}kiFW%J@RYT{CE2_eBDr2JM_=ytu=G0l3t^D8b zw?vsdLfMDHGpN7qC7M#->?UFuLB1>8~|}#dMTF|(?``Omb-uEvM+i3H=k==Lt6}{2G(0Tr5M*bf$^5?E^fE;c>s^um7@eR(w4<&2J(4<&hB`XEY6pR*wrqJmK%B*v#ti?Q$FlMY=Tc zHWZ-|+k?0AGXxko7#6mSLrs%6Q%Q4PTE>d_LPpbl{NH}rqlETtujL%Z3&NzM16_oGuD$fs6>ua;>EB=yi&q4SPV}P!|m&faf5tajcFFK4KzT8Bn6>mP(#LY z9s49tn&M8mF>s(IN>5>V+B`|mYx{E|6T4f12XB!4BxJQ!4e?sc|GDwPf0Cerlq;W9 z2Tv*FBq;_-(s$7szSSfwaLz1R(B;+E1I7Z#ShqFrA4zG|x-i6lUTX;`^tqjfFrICZ zgSRHZp*lM)PCI^v{uZJa&wf+phK`L6<;6xUlw!)M9N9%N)x{th&DKP5;KE7C9rxTJ zw2UT6ozXfhJ5e-TDb?=7M(Q1Km9C|pMU?z>hff$q^*Jfx-l0?4r8Me57T$wlkiOO>)2(HR= z4|g~BZ6h9o$=N3lbYwp*L3qwWy*Q|}Dhi|vMaHn$csX{>mtTee+pn+C# z&I=Lc;1;MxX70xpg)%?4rYanEGh04a&w%i49QW)vic(w)&6TWci7x)@@Y&K}eAKrYB31NjaiEF{>FR4#9+kQXGA7zAe&%yQd7^k(yT zXW@w+J)KoY%W>`*Y)|WJbF0RM;n4=bYOv<~+9jGa0>bL0=7;uI9TKXyOd11^cCBsO z>E-+9kE$I}Ngd8HjCzUz3H5yH+j{2vn+`Tvxe1M}cB1{CK}0{7(@nl*O+V7ukl7BI8R5aLaxxL^ETN+58XMgsUKvGUQ{zf#}n@3 zscgsL`}#F!|6u^z5&NTNKM-axuGP#elE@erQS-Lr2zwn?d;F`n6)*y~OD z*@370hpu&$MuF~=s1yXlXHae$NTa~Bp@x1Vnyu1ft~Q{?v^u7Q<}xC@A^#R*U9TA*tI604ixO6>Q6ra`X`div)z zq062JQ5jFI1qN=pB)GdBK5Eyyp7@m_>^Rt#?hOhpwlvzdksES|xxFs}mn3pVR+pJi zn`bJRM^?v43Dp!POR;rC#JC^4>i5pf_q{}$+k<(SwHIO$*nU@9`H~G z4%%gY1XV8VLIcTRMY*Q6dplKqgVn%k7c2Hgk=3^z5G63%t`@FG`p30LdPAWx_DZ-a zLN=2<#U=o?SvN?jV399J&7L6w;o|cKjl@-sKIM zCG%ukYv7=P;uBAH+q2*tM3*wf7wZZ(YS>$zX;R*K%s9^@KrMu&Iwo&ts{m#Y6;^NS z`%F1rknRUtG#z`H#?$)qmp_A5SIgniIMM9H+Bm!45iC4PP{=e| z=~c7P@lI7Z@O3I-?8qv>H+LA!SaatR^!Z{7AHn zj@7~X)=5NiH#RAk)k_B9p#5vcUF|s_J$)^Lxi&2YDSTkhn8+uFYl7w(M!a=+?rt41 zDnw}#P0n!&w+(Fc<|rP(%1fRIb*Jj?DdmmG)uus(bd1v*`aɞeJ_)p15i0Qh=b z7#lodrR*T$`^k4eE!a+XAZYszha7!p)QgXCFF%v-659!UudI`MQe@8YBuP}0pXUn{ zdaLnD9JK+~b|J661pk+;tG}o}`5@9M`Loq}9}Sm6avu$6qXeyyg3H4gvbs5nj<~(v z#BzGjf9d`U?%3d0@AD!buRT~4tC4(2Y+qljy-Xmt+p5*tyt=ZNv;EgFN+96LfV|np z=!!>_ZsSiKmEp?!R`JWgR#`p>Z{+JvnbMQK%*9;4=e<@=L8NW z1YU3Ubb*}9=j#{SZRm43bK>fDEdn&-4$3wzRh;Do$j#(-0p52K zwcKT2+HY|JUB5Rzvs-CK1$jcE<*z|%PilAlGwO{RbZyvE=tjwFu+>DEF&8;3_f~g& z?4goPC0`PGu>8Cas7AJ0ZQkX5iADwL2+q46iN(IeJVf!j_Mh*Vqm@ek9)MnklSZ*+ zPd!O9a&DS=m8#GKPG_%Nb}7*iTE>s$heo=Wd^2IRNL=okN2H!iz^{S+b2ilgs{J#6 zc<;$W8DuMSg%S#4557;!l{Rit64={4ESyXu@WZyLR?6huDDn^!5`Fx8q#Fj71{9Ki zO4jxGf4#HkhZ#M)?tlt5Qx);XQLyR*LRP?aT1#D4Rn89Jes&h$o;J-pG9K?jx8p`8`ZLq^?}(M`JIoj8`5wcj_I1p9MLoaNz@n)4GwQl`#i=O2 zT+i?yGnJibhZb)9*{+9BR7_i39OBe7t-jp@D)Dxu_zSwGb*6h)FfB3NB zoE&PRxp?H|jT-gD`-BOlwP2`cs~x|I=$TLE$o^4RWHZPaNtIpMmejrzrtQ+dbPfq0 zJ<&ZrxJHDiR;#ju?z;A$@8}!4`F}=jNq8U(n?@FpL@}nld6iXfCnLU*&D>3KR%Bh; zq5sXDR;<%ooEvBGy3DTBk>2RR{?9$PgoZ}V*n$4b0n6vD0Urig)e)7Tu%kH^jYX;X z?&~rCu~r+Y&r+48nP-Rz?*&X!Y@Q5=kBdoeE(bi%I=l!N_nn?QX{7W4w+#I3Rxi=ix_)q`{erpP}R)FKOUjg!I1i-hm2JGMR^)(1zUO^sgjqC02NxWkCs&Ghw$Kl^*k zyVkUwrzNG_mRiE6qf-_K5a%+`5r3~kAH)e6RGet(JSk%A!4lrPglg$Nvrv?<%uU!C$e6ux|Vttp=DMSi}`t$7IX5ntnZVBfG5?7$y1@YyZhSI7CF z(haUL8L-?;l016y1{L!CBmHA#C596ZBMTY>{e@uA=`czX2m+{5psmsaRGI7pTdy^g zZ&av=OzX%Lt&2e9_(ji8C(nYn1l$e}`UhYu?7s1a+H!*?j~^{uBT!aV7Ddf?UHi{> z67^2@tm~%glB(#D1F2RVXII@Jt6}{iZ{_6HU_+>1)$)Lyc&1BUOSu|s7*mFFYhDJa zv=5o{npbJz>;jR*!YP%Q8~Rj4isa~K`v0bQt@4$u)ePOyA+K4jGupvA`lGgir%L^QlsKSLtz#!p)6IsIRXHof_c z!T*sAZl`|IE4gD;N^INqpEB!NX!vp0*BNg=_)i+YWAPhQ{=OKk{#{>#=k|W89XI(B zQF%`<*q&}KNbM${JU;DP=WW{{oL%Ru!vYHPyf;& zH{ISkIC z8WXutKN`A2RMAYi{e|sJmTY1=8){528xS>mHjbcV5PC}{I?|p6_Nk)aTT9yA>tYl+ zPKiyO^(pq&{?pcvKYN)v<-N2BSk6+TkmBIDVQuY!ZfQj~dHoRm;Q$(^m~JcX+s^%9 z#&+WklQF6lYO8*pSjkH?lXG0F-rnwg<&PN0vFR&N8jT?BZpH>&YvQdV(#aLm0P1!p z&m4^e`$hq4tAV7R!rh#6zCHjrN4s~DPdMm;`ulp+Etw**6u}Q4(BF|_cVRRW)bk|* zxOkUHWCMt0-`%F2nak)*_$n(6C5Ueg8*_K7s?}CKq#WRw+~WS+{p~`ko$f(~mcq2A zR(RLS=7?I zz_DflwiM6X-4*_KkVO9XtkM4kSneBRPQHH({+IGI8x&Q}#1CQ8mL8Ut7P9(rk(~)~ z@eh!GEMijCi}%rq4y=lnHsK2!Fz#Cfz57e3aIH&KcDZCXc}^~LO9{;9mU;{J zQUW_1&e(|khC#lDz23;k{_s0DEe6 zemg!c4jB~b7sc}lC8_mpxw0wSQXBVaJ8eD$z~bXey@P;zBCmyO+2;fW$Q;xG_; zHD_zrrhWv(S7MPo7AUVsIH9N5PWTA;fk~ob{&4%3=$KINQiMw9s0jidM#gVV%5B{4 z(3Oi*n-9E5qlwaf=E|nhv#95G?`31UAT`@ZXTcZ`OL4|YaFlZBK}{&UXzacVfnedN zAfLHku{lC^Tb>uogk?ps6?1HCY&@TX8~E|rV?49S#DN*zE~ifegS7V$l=t@H6CLTaGOg1h zj*QbX66}EH6PM6qjG#rf@j`GM#!A!QDl;QyQFMHq*NuT*b%V#v-t<<9vqA9hIcA>1 zSxQRE6gSlyM#HD@J_;J45($kO^LlZU%n$&;S@w=xili`SI1qv2ApeF8<-Hu&SC)jT zE-gb@r&)o0sws-w(#}D%w~DOn*@_B1q^dcIKNPG;zh`%y%zm!+zD5wK$?X=V`9#0G zyoNSm*tMadp~b2D@tbEThQ9x~EGioG%Io24=50Iu(Hb@-_0iz}l2w*NF1Ml=Oay>nYq zs~8rT{Db#a$)u*Hy&mg8%(o}BceOA%7#ajdd3KY#f+rml2{a~E03HtaBahU2T#9V< z6Z6ZAjBNG4aUDd?yp<59zgNa5VZf-eylwsqK|-oiUKDZ-Et-{B4LO!NEO9;3B_+lB zUEo5_hxy%5RSs<6Q@&t6hLGtE`R(^t($29VNckY_63vp(^_7J+-U^-sT7~5gQ2nP` z??CSQ`r$73hh$)D(T-+y^-Vv~r!Uy5TobZ;brYWZ?pms_4dfSHF@gbLS?!h>&+`sX z0>AqMl@Y&o6{*DbA4EgpBmhu)buggMTw+qqUB4SklS!(r4OofhRSHpfZkCdOU8_Q3 z{;drsQwrZ3u9^C29;M(I=ko786&Y#51O@M-=fdK&%HPy#?Qc^^hKad+V4$y}BjPUb zpu7DczgAh@$nJ5|j216m_NyfH@+;=hkz} zGi0eQ(ds0Js@Yfw%4ZU>l&xPaFi&fJc!`gu1eTO z>Ibk;5{?D-1TFeghLNHy6{`e|?$)?aD8bH%)_y~S$k@l*k5k{>=_vxD3(Ac(Sz
)2hf_j9C4NQ}>#y!C8{z6O5-45PchTn%q?Q$b(Yu7S~ zCT@Ahl&KAy`|aox(LPAi`3a>~etGZMzA>vt+fU(KDFiZqSZ*IynHfoK zsTmlP$C#VKys@cI!c_YF<(;DDMSWTGxP)OrURmLi`BV z!<9lu?vO;76_w~J$7F4J-j{mFl51JiK%2EWDbL3q3hqQD%P_vNr;m6Ehxz7;;)QYc zZH0fDjE0F|61p;`hp`k_@$0A_M@$7NQzgl;<_X2VA8mU(d_fdN&G#@5MED&`tydSQ z0%tWR&bPmXHPgK|s@_0A{wg&$vOwrKtxx%!L*_yFSa{U?hv>KI3knL6_TR^x7Tj`P}&w-N}vyENT|0(%vfC`G%oKi>DS2pqY@D>S4_ z$?(aa&Xjv4GCvkmdD|i8+Yszyk}Xbhk_^MDVOxPJujTtjqgz~aD!bnfQ&lwOAN>+# z{*51NYQ5_#YBG2YG+_k`PS4m9Bnv!NBGYUOzZHRe)z1|>mfXWPIOkmo%Q@S6%fq(I zb3QK=X7Kz_QH;Znl~BV#DVs1hSNf=EHO@U#a_Mg)lxRVK^Xl^$_q)n#S?{WBt3=VA zDF2|cm;tN56kJE-iyUKQHGy$$Rm3PjLy;l{_hd2Xr_f_^LKUAQ$G82VWo%{@B%ng# z03?p?!%MV^@8sTI-iyi{`Ec?-XiNo1*~JIt@ETHa+^kGGLNGgeaqY=a50vB>+xXfL|>%goQG7F}t-l9~k zLj~^I5k0*IYpdaeCeS1HPgc{oo#ftv4c=bMuizqYC#+Em(|iB)C>u-ft5ye1w>f=Aukb7D$}0i0O+GSjH*xk{qqEVaE8S z@=%%HGpu*)Gw*I&Y6?~XSg?x{qX|Ll_#W@0xSA{19TTY0Spchk|LH`)$R89IJb}k8S%We{bKnDICX=8u!jVP6UYL)5H6B^;)J4Okbhdv ziedxZgHUDH20A0buWUJ7Y{gS-wg~Pl2@Jemd3Z2-&tsd^mWW5h>hsoe)aal`?%d#D z4fUtIVoy_9->xXedJz<1EJxH{$Y4Q@e-KL~qgf3ks31lhh<)NXDMHr|S7V#cucbBxox>!Cl-niDUY zzj3V5@WUq&lOVl==ml6USln7~LofAA7`5ECx2oJwz6~OT(srozui82h=r ztr|VbL#N)wWG`303zn+~{LrK)7g7Xriq7Pm&HOZs%3Dk0ea~=gfhU21#W~iD+K9=O zaNS118FF>=RJ`?@keO_4n?gQSL4Fa3BUp0cfRC;`zCbkDbhdXbZ;-XTa$4k(_Ft2! zxo%I9lHDV8Or20psT{`-x^guGU`w7VcV;Xp-hJSv&w59?jLS)jyn{jgi6C#(#bg65KA zQGBo1R3|^FpqwY%b3qc8wY|8MEs7K3$nmJx(&IT`1(bpK@usL=3j_s>84-iLBMHO9Rll7a#g5AX*u z1~xTbvZ#lK$lm-%vTZws_wV}+yT6d({tWKZW{eI+J=CJgX%vAa~O>2x{xVAYu)=+A7^f2v+L8(DH2GARI{(U6O_-1`~k5 zKbiq%Y5OK|YBAm2E1S)?CUyKv;WVc?J2vH?^%yu%*aRQ`q*x)rnh@+73!$e@Ru`9I zrsERZcqdw;#cx@j-f8Iyq{;AMPN6tlD`9S#pdY=LFuZVu3E^E#Syg7^uuXMb{lq*) zo3(Eh38-xx=P#c}*+Ha+EwZB6RJq78Mwp`xW1a2-_#!f)r;8?}p^Ev-C0g`5#C*Kb z9EwpSuq6g2Okjt~O`-4a%t&W-#%*yoP%B=mfDkWnE}0u)Oi_-8Y# zbQZ4$M}PaQK+pkAU6Hs3wJ|VZiKj?K+3mqUc5$-ues&=-Ys2$rcjFppU%l9^PEZ;D zf%5Rp?w>V>G@^V#BnSAbt1ISnfiRI94u@qwvqSJ&3RF8$3wg?>XMB#DLq8omXtDC8 zg>N7ik_Ua1aARVpACIDEgV<%j7d8UaPNvm3TyNUcQiv*OIE;K}gDUjZKx z28OJBVnKq0n7(t2)r&7-1v|N$&%I<1CsE{l5UR3PKml4QU@k(g$W7g^GT#aO=xPTx z&e*Zt*w|PnbPMe!@-44)f?QdT*G@Mh!6v5U3D|W})Sh`%2?=q*BHDJXlfhR*<(>=9 z8WG~Jm05Bqm6RB{OHkhI;9@4_b1LxEtfG&nOkC})V270B!sDuY&<5iFI`{qtQ#jAnu?t%5Fu(P}j4S&jxcEM}uvjog-gTAX`_ zcU+=dP#{ZdY;4~~nR#QK(gEh-;SuIvf!e=GoqED%ik__8SJsV~7VsPW5 z%il`x(;1;B@8~=$DOEYV3iMG;b`g}mNK&w`Gmu8S>nP@nKFZlaIsPzl1;%<(eAgD8 z1yL-`l@VatM;S`;6%7(;itN%@B9%zSJ^73@O$gQxHly$=IyGTvK(I&gViUiP+~Z`e zvHgxqG!>LxpgvDGPET& zC{754t9OS95^}SR0C#4McuIcGKh?OBtd3T>U$8R1x;Ahy%>I%g>vg&J*~u!;KvuFc z*kPy;hTP-Qz|so$`>z%mEpUK5jXG5JC2KfwfP+>7#fH0^_~(?UKvnRO=iU_#Tjvxc z&(>V;^quF9GFYf~=K|xZNN*((hm$)~^fFp#9f*`1&kMYn=KLEpsBuf!6ch zKf3OK-(7_${s^M{ilm@2T~uJ>S9GJu_W4Ji-5XB-@DiMg6n$yV-z(ER!nPCRq#5p= zYcNJOS44a)Uso*CG+ee3>7;4rnPbpTHpdt?_j3F|wl0jWHrv3Pu1Srp`N6arTg}~R z)v}tqQ$uC>=%}pbfu$CO zWylik`?69FouxlxksOu8#R?p~L?c=mJr4K<&=+*Gfk&^UJ)!KY{@oGDammrUaQ;p& zUeWnpIaKuAgu%miek(&FcY+PmJ8oGdHDh;`_9}QBudT21v;tx1gS9W#CX`ep^C|Nh z6P8s0vQpxNA8d+bD=JF z6@3*h0Z~eQTv$s#y>+-4v-l?Vpgr=S{p=dA*0`b41)xXu-80eey?v>pH5kY1m{Wr{ z_|!U#nZ%?G5d{7`tx`&7&@s z8ype;hGJN6ca6A`jAg)z6X(3+Pvg;^^e)x$ZO=y*59>=3HV7`!46fPs{tSd^Y0tyi zv+?DdQ;B{KT$FrNH-pY6C}Y3+GCH1A_L98u9+pyTH^Ks&kPzY~c(3T0n+xLyHUN=!uk6R8N#fjx20Z$! z8+fwh|3aYLRY2~M6kH^sh$q%O_;u*~V%Y8QdBFWdn6|PLtC^@`#OxO~*%9n0|Gbmd zYtf)xb;vAat~2D5bE7kQ@yVft`qf;H?n;JkGL)gJfUk?E?JJPzVMwT^=5~kj=btI= zTChbpN9UiErDDr0u{#>GfWGbeoEKy?9(`wg!^^yQXhU%bDFEeqekaU!7HdOQwBoZ> zPMWnU1PjZRSRZ|;`T4Y|F+k<@e$0{AL%BZ7u<;17r~-4=4@7o}kwGN-2bFD%wejV) zPiH~`HMhX{1*X37oK4zopWIZI<)ue!xVVS|U6P8$7_Mn|8)4sUZB(r8268MaNy|^T zKR$!G3`@9Qm74d-bi~%dNQ$iBRahLYVuvB?w`OsA@I7Fpugj6;Ms(<(3`a#oTy|d3 z0&kD$g&giutF{;SgKs8l@Vx=!n(!r_H?|M~LY&)Wy|HZXsnub4P1U zGML$v04Jai7Hb7cmuRMJ^Uq~#9-h6lXJ+MTEH^GZxi)&4@{4PAL@PszuUB|y2Kz;q zI2+#?DIm;EB3x>Ot1%fr(>W9uPC=>~=&|HTmgNWc!9siD1;(I1?ROAQ^F`V>^7li8 zOWISt*6T$#^?fj5YFNVCCFJo?$xF@1PEdQP;=~*>MgD%A zPo0{(`O`D0JRH@16=sc-7P<|8-9J|j-2DO9*H4eI=wrWG*p7~Wx$me>GuXB@*%zdp zs-U~=P#C9JQjw8`bXfgTa{F#UY{xyujPwA!O<+KDarC*$^#X*y-<8_n8ot<%MAO@^q8y-nBeebok zVHZ^n*!a_gSHxSGB|GqiB$NQHVPOCdKiZG8vLv!g1t&QokvQ7B>>TEg*Zi2YYTK#$ z`HLUa^wuhkYbX_{Xiy;suWcCH6sKT8thF4CD+AD?t=g04wTkN{nuGH8cbPqb*+RTv zTGkIgqAT)j`Lym`qUAGA_8D$!2kV)SY(Kr0mI&^ZH!_i)o~$~$rKu^>Z4{1GkM#3j zC9lTWsTFMMZ~=!rvWjLl<#@b{qh@3!K3OC^{CtVFaIJNw1Ku zcTE1%0uHs&oe5Rj+jaVt%Ij^edqo-P9sFmaedo^|snANXa5FGNdmkxXYo9EKjsoM0 z2d?*VwK!pRMATL64YrZq15WK?3+qE?&s@J%5_}lv{Shm;W`Uj2@N|5@RBh>_eC0lc z@9*W1QTIuCFiy}D+8g#$z?+o(w>4~T;=k?YY~z{q&fiVK8(Mj&YFgXhCt*-sM@U5H zsHJn*4*&?lE!Z=0-)nJ+dQ>qwr^1L?g2+VvSds**BNt0NZRg19g3A6$p;&QZq2v^T zYvR4Qxoaruw;f#3>(>py!tjPh0}vn<`HRH0!7TM zhYQlLN=%p(wKz@~6q?4L3k)41x*pXJAq27H6#ztRZ;@$h+o(Yvy>L|HS(XAri9}vR zj3&saeh_-{G8UBtI7a9B?b${}D_l!eLpzn-9aZ6s5PmYsOo8QU=0aXYMTKn%1qG!E z=ih^97$Y#&Vv8Ta`n43Bi=xYEXzGD(NnIzy~-g&fY;!itiTZ^%iL|+yisv zC~6X3MD+zJ42*!7)H1FLoua~!f@srYD)Sot{^@&egfb2Xsu3CZ%$3qbxzhNqVW3kv zGGY{UIcGG%%B$W_=agSoN3Js%7sne&j}Cu(^#}Wv%0obW1@9#Va~2|DITA9@E+!vd z4K?*NTgXfvGqs0MH7i%)&$p-b;kP^p%Vop?IFB(X?uN#uzy1CJG$63e#Yn^}WFWy< zG~~=6@dvB%J#Tj~+u{t3dC{n;Jm48;_Phd2VHKpA%3>UrL7G2>l9~5b1t4keQ+G0| zjLs`kPfRf=FijTs+Im_A+d8Gy>dL~HRMjS~7|-n{HK8_Pgp$EY(-gwz>tU9a-@s4` z`;U*aG}oEBD2BFCa@$KlQIJClj6%(P>&BTXv(2F17W)-GZ6mS_II*b5!@MG1z(~~C zy9fumBn5r<1e*~pkb*Xl?tc~+N|a;EgcPe`%;3Dg6~!@szy9l;KWhrwneWQ}sGa=L zspR5r^8ktEB^t52$|YLRv*$hB!+xK=pLT$od8YNkOt26AlNIaLLZmSUKg~32GDjt! z7}7tC#H93yJem_RV2k_^yy(=S6T3Nv7kcvXSJZUFX3#MiTNd75@E~e%J z-kmk8;=@uRow0y!MsdspydPnQY8Q*Vn`SGasi_RR_1>>E&5(5n87z*#@HiUc72uaxF4pCv0cL7xw}%Ti?yMIW7GA zgFr3H90I4dPEX!kwZhg#)Gwf2t#Is&m*nhspG&lR7pTr#)+2~yBPAeC^EiWVXm~hH zVS1k=@vEww&&J2gm6=LwQe|$r;2jGuf{92kT3+cidueHHO`WRu&AVOZV!?f(!K3xV zu;U6#ABI|8#h%@-(#kZ(C8MeB~;= z-X+@1GL2|@``PRaGwZk3nl*LLo+6fo{{S+DY0I#x zfkum|KMa!E!f`;sc&f$5nc0es4V@_BOCl3_5iXx~zh;X^qHCD*IosuS(pWi%#M4do zEGuj>8 z{y1T%5RAKtqw3nu<;f3Ut(F4QFEXk4>EFt~b4&OZ3z4`T&XOBV+QqT@yA8u86ZOB+U0Rz_J);+!=TL4MB*Vrh$GxJK<2+!;c(cb^zEl_2cW_rK#=w6P^> zmKeL@3d@YU%n!Fw`6C+RFs|ZHG9?%H=Dz;)I}5})d~mxqc;2%S%^Y)STEIEzgO7;1 zW|+R?86|zk)AmWZe-a&hf_k|vi(}WB`f}?HUz$}tZRz( zMaZ+e#V5u}0tFd{I{YQ{Q`oKULx+$b9tj#T4DY|=A%FBl5x9BN1Oxz5GIC;k03OMY z3hrl56bw@V(19dWjr~#~Qqqq80g>rV^pbFOBZsu2o+%y^WMFZlDG`Gcic!tkH!5ZC z*JNs9WdhuyU%__ES_@&uxB+D2zdKZ8X`4Ord92rz^Rj5aF11DN-|7#S{QYxh<09?X z(KY29Tdf0Oj54dO;ejyr)Wwf`yq^E_E!<$fWE+lz9!C^jYoa znhWkva+^2)H-`xv>ToG>Lu=dCnkutB{_qgO*a=+;u8bG#Ba!>`J$_8U*sU$`Zgk{ z>jz0}Fpse-e-gh5;!B!?e50~_`GH~%88eei+rGdvo-qDgRqfoWV3y4J3f1BPBa10v ztp9&L9^`uO&_ww;`J5vh!{=d6(UVbf8w|cHEQ%9*K+v**gRt&|g*M z&^4A3?DHe_EiYpcadW+{v6v|*e{(kBExn5aM!6(p$LJ(=P0qTReyRMGm3g~|iRI%e z#6y0et)?7~byWRZk}~EWDL2lQY5RhY;9T)#__=56dqA2M1>P?Lp+-*h_Ti%!?$-~{ zRB`cyiODm;npKAD2jWI^Yb~6g$l1N1(GmZVN3xYf%6nXVWDf)htOvj2@qW50i+zn) zlEtZn%<|Nbw@wLoYW*BweDs&W*f#_koX4vq%xV~4bIe}EoJFZg&1#T44I8bvE6}kh zQtti`+t;kEy3;Vd`na^Tf9`Kx0$H736SoM(1X^V?fBFUYy;-1Ed|~j}?PInjQ~cI9 z`mC}hR|18tJr?v-ALq7Rsq;de8FtcTGy|G)uscBS#o*bFtZU$5x zt4m5z=Pjky^Ms179Tg^r6V<9Ri;9)M)AI1v1XP()Y15h>pH}3GFl825l$$ZAFSJy| z1jZ3+6!1LZ-POePMS|59YG16Qy%zbZlNog>{Cr&W)?EQ27sgG{VezM*xlq1G_YT8I z@hmYrKG4Q4#N3)7z_r{<3zl$uKAE7SJipf?ljrNguyG6DuIl1b>z5yr>)tl1z0tzF zm>@I>M7ULv#vq$Y9UrSN=AU2NvOs7Be7c9wMS6YGytF~gMdH~0poo0h(=gM7gsk)% z9Sl`4nqo;Ap^-=M?SY>q33U~fT2)8(_{u?aA^}CN?wYoz0kxEcAbKV#4tB>z)xd+u zGiREEDItsv5W)H8Wl|z>BiQbPtKIR zB_XUn40#}kyfU*+e?A@Y9q%kR?nZK5#dFt;XN{DOwua_V-e;l zjLoLwK+NWuX(EqMnFECJ((N6CYL?47?zN~p_b_^Fwh=SMG}Cc`M|_c|KkEb~qtl5= zS7D=v*}TIfHmYd>F1`64Bl2WEZ-l(BB1(%S=&Ko{UxIci_Q^yy_2n}v5ANa?^ZivA zZ#a}gnLpp9GmN1v_Y!I-GPG^h;Bd`gUX!(~%LfGTM^DwshOhs~4NwX^3e+eGnE-t3LukzoWE=iHSso3&%QE3eDgS=paO6s<2Ut*w*& zaLY?Cs4eP5u6}@|`Xq1X2&7S1bcxg~fXRp<-QOUwO{-~0Yfu1**tuQ6HjTy2nS^d;Jn zC$a+g9R{!x#%InWPLvVAI=mrZ1ZL;DV6BRWr^l}}V=TD-8Di_ILl%{rJBK%7U54a+ zoVzmRnRWX{hx!$?zm|ybDqIZRj9WO;znPmDV0ObHTG9zSbTS;uY7pIF6D1(UC*?+ z@(V%Nikwh0c&;jsx+5PqGRE2p@?U)E_)&oK;>@zAg}Ai1gpci|2{S)gp|x`#r}-it z>DhvQ^&q?#SsQ89CMbP{YVULZfEdoa9N1g>6&%WdjwT?ILl)>JH!A$-Y~9H4)>8h} zyBZlKh>#InM^iC

I@q^QiobH0k-~+AkC&&wuv5M#gQt5rU89Z(wmgI^8-@+; z!m%b*J=W!zs6PKejOz_-l-JLG+xkXbZmSO0cRZ&c5$@`fJ4mw+rJHRZH-x(!9w2{O z^nCTLM?*W7a^^=DCh~K|=f%I{{p?Am?}`mX;d-0lc-Cf}S-4edF9&i`Ef^jD>#eS< z?CR?OU&($D2;N#RY3W4uin<@&dG*o0xwGb3@co_6%b_>N{()n3oImtp?R%evkjEeI zTD^GLbdTR1Ay{|BKy3x7!Na{15N-TzZQ&JsT&fQ#Huh2JVH{;~-! z4qQl^kET2f_u)b6 zCs%R(`pBEB1tMu~m37y*Wn=%8%;a5MLw*?s_7dNwm|S>Q6J+lz z)>$ADAqe9+2SZI8d7j&aE<8z%6Ku7Rgo+M_Yuo=~cy(vrrI|xd`mrwKZb7|^-MPgN z)sp79LegEO;%b*W>K3kdcYM6o#GfV7|5)S7^w-|MH9q#w8TFe=_)T{5G2FdGrOM{k zgob9kKyJ=xUNu?wO~*fCo$h?gz?!_)*>|ap+M+scRVd+>hJ`?i=~e1~QF_6X$ECt# z@cPPPqodxAt=#P6E~=-N_4gqIGG6~mc{>a|JtI@HzCzzR78_MCB;HRG+oFGyGU!1L zQ$zL6^uu)NLR*H?hozisDHl8js!dmkkPQi0fxj(G^Nw|GBz)a*zzgJI7#Boj5zR6o z%sR7IW4ZT+ND|c48*&#(R=q`|5z(NO z1i8=Sqrtxo8*z$DDNI)`Kfqb~j!bLi?tO;4uovnVlZE0kKy1q)7xAB(_~%ty7BVXs zY3+@Ftkb>-dXr>&^0y%1S2dntiHQdH-QhsfZANa=!&ma9eoLEld}UF1Pd~?fwR|$L zEvPu|p+z^K%nvTR9`kSuId8^%5F>&8JAor^u5n{b>0k%7-ZPu@l6n&$0TuIZz1Z_; zz6a_VwHt*p1p$wA8LPTuL^O+pX){qp%1jl~e0v!2Ac5g<{rg4D%NDqIMeerPHHN;P zd3T4L=P5mpi<){&f;TKL44B;0ek8TNy{+1u3$-Ah!Cnm;b2IvmhXB(vmPGctdyigv z`nxcYyaRuVIcK2s$Y@=4TdaTm*_#rU&m|i*^qS3})?6qGvutR<)=T_%LBaomxd}$} z?lFDV^HIt*D{dTDT+6nD8p_ZS_d$(BL@C+GD`6K&HmcDpW#vf&v?(vK@;es2L3@n` zaaI4SIx9D3u*=O>gqqnwbzYI1?|?S305)yGFsu(@;Lp5wWk$mrHR*;YOkN*IUjg4y zE6!_*(mnC&q$+rW8!W*4)~^g4B|O#SUT2~g*M5bN$N0Cz%?1fpS2+ad_F38p=^1IZ z(3X9Z{Z&kV8r}KHlt~b(c=cLeG?d`E8ut?@qq#9M!G0Urw2XL<-|rEWR#7`4=N_Zw zB7Rm85FP`+sv-w&DjE)t&ZU0G)9FFWU5I}F6)uGw2tv;@B+^YMx^eXr02aB^7J}-z zSNnX5qrkWsAeB{b7^pJ3%AfyL`mHx|fUZCfSNjin#55!aT?dSV=khC^Y}9lWS8jjs zQV%GPDjF#{S|8qSl-c|AY^LHn-WtZ>&TCV$2UmWZu2T})MNL9BbW)K|>l9Pd=%hS; z*q*82;ZCHsU8une_xn10U64=dUWm!;<2*UId%zSg{~3Z<TGa+z+GU9qA2rmOke@G(~vPd6gbq($Z)i+gFrS3!Kyv!F|HnN<+*<_VSHLj0D>HXglxM zaTG)~EL)4*%X9xqoaa*?H3D0|9fd8A0n8SlC`9u(v0tU7mcouXWQw?scXT~XvkWJ+%`GS0j%f4<0nphN6{t#hH~-~N_Tkn7-|+}* zO#De7qqr`=Q?j5vEZ;)p5R0oD6Afoj3UW+F7E`Ip9z*HCS9^>Z2iQ~eV;ZwBw!|F( zunhp_^AxS;SQL&HO$4qJPYrX9J>%58Rq}x(@h|2#E}5F$@boTf(zq}9-cxOsMQbj- zuuk5Q&_Rq*4=%(EET4Ja_#F?xD=w>?9oP$p+p$ze!C7hnQG)rU0R*z83W>s;X3?l#&K>!y?`05Fb=@AroAyXl_Dovgxl+-agR{qqF(JKCC z4Jl%G0zYU?!qHwHLXuHRoF^*O0*j& z7ZkwtOx!%Sh=OuY^ofc*({(B#rY)sxNQ3j^1+nv70au4Hx6%T&;N=vZCVh8BMk4YF zK`-$#!Ukp5hEqCu>9pT;icngxx$DFO`%!`b+Rt*hSRG1V_M3Lwzc@KK!d)cU`t%)8 zAT6s$**sb#rh|0LF@K1SQd(M`f}nc_NE8`U`c7Kv=6b!`aU4&PDTzO6X{uA|8~0{o zvIJ~~Wx|+cx2*?s_&SrCXg+g^2wwtP6FuLigA%6B2n5A_?);7yyQO3}nXdIIxt5Rr zG8iUuZ-dtWJTmo_ySh)Eg|2YLnQT|htw5dia%2fXVXnYCM?i#|NGMTf(!;7a6e$+~ zvQC(($peBK%7V8pAe_Mr;GSSPu`!6|(4;I3I{gO2ND(cR0aRr~DFee*-`vbhE=O#8 zdO9A)%6j@EC8^YfU(HIyq%u38sF-OfWSK;yEg4d_;%4Y{*|d5h19w$LH9rE8wZ_IQ zWJ@E&TS>2b`MVcNblJwz&qwlQOcOafr!^ED>N8JXJCaPb1v8_)=S%P6$3Y5f z?ChswHlt!jffGFiMr{MrSE87aPQKcymz{u85p%};y`Tm1Y&vmU_@ikbqIirzYWMeKWnVG)w64*a8!$ zhq_2ZYqYZr-XfWKMX4Hr1wv{8!mzRhT5?s1A?TkZ`l#|w&u9a!>Kha5xJcFHR2Gmm zWxsfvjJE$-OfD*`Fw2BQGnun%?;AI&8AmFI)tbdtR+Be_{a2Ws zEfY1+ES~8tltGK=a%)nSn{|8=hZ0yd&r>1LjmgwP8lNnm_VBuvumNZ%q4zp${E$iYa>f+oLAF#JD?p^s{YZp%zE|_rrz1Tj zJsy^E0n{x{pgeGL44-%jfu!XS(+uU=gi2ZyI5<qL*1hHAtLEEW{6Va{lY%ItNepct-N@U$Z}T0G zW-=t3hCoZmFO2LGEwlLN902kI^|LfdCSV5I#RWG>*j49sDz5+wG3`6`Mh_GJYh;~W zJx6-dV9&tDggmMQ2VFH^cUa1v4#ipR{+LV0_X9Kv^a=%7#Ao#qn`>zn$!;>n3u8o2J|vgJOBYyn#mAp ziN&e1Ss$DURv9VFsV`56nQ)DzoIjrxluc3oN!mnX5E0~X!Gz(tAP{gbj(Uw$0OEaZ}G7ODKwn-C+Xbk(Kh0a~z}+N*kI;K7B7i zU=uK@_w{J7JE9uXniGjXn;N8%mEP`V#Xykma$%FCJCfFXvI^bd3C{%cNPYlW%ze~k z3Ey(?QNkDQBAn0S7vN#2XhWH)ij+o{4BxKZCdn&e=ZkOV3qec6V$;6k^&N6m8cE@0 zQdyHSkKh=CIlKY1c4`oAXm;C#I?FS)NQw{@w_^JGij{|Xpe0Yxy=%8K^>~B!S5Alf#HWFn=Ls)%fwj?S%JU_32VVtNJs zZSXTE%DK#odr!yxd~8q#H?Kw#v@(0Acx`=ziSxdaV09seOi6R9sabe*Ld|zd# zoOT(6YG9nqhyT+y>2`PWGieF-1_vQxVl{bwIhKaud#{8^RZhYlhvOPon_S=uQD57> zwm*bQH;w!?2JIl50&$9htomXfc2CkS2lXbYPYzS7HPFCcPg95!p2%2z5Gx@wDXVhO z1a{%_$y-=YY-J!-f^-6mr-*0SM520fIW0K_eeJqT%&IZ>%7pukco^|;wxsUBUXh08^bWt@sz;EOdjar`v&;jc%ezD?yUwb8Ti5DBkl@H;s-;vxFNxs0l#+8oG$E@T?v2r8x-}fZk<^ zW}+3Auw>Rob!vl$*)pLMlCI{ug}%qm=ApA>A|A#u0sfIE0Tlv$dru}q>hu-T2?HPs zAWVc{Sq5}5nr@+qGvR&X4Ygz7aw=5Kcs_=|RO##wApBkrogw!d(1EBa%~wfX_SJxKz|E5Nfk|^-h!BEeL2Tq zutBZX7@6Lem0^A-I)w~7d|iSO9`_8%1R>G8Zdk+p27 zBNl?xCl`*7Mmu*90Nc(}HJ4gu2v~cXQ1+|CABd5Y#iq^`>WI)gUL;_$F&eXBqYUO^ z=XN}bG7?1=_c+l!C1%mfki~sP+^0X}szs!xS%w5BYf9P|+GFlfx53i2p~J=TeYCu# zjcKyPV(^1%{M;kdWa#srChBFZ2maKz&ki%A1k3Q{9M7!t!c8M>H+=R)Z~K5Uh(J>Mf> zPQ04pqmR;_`k_w+G*Jcw&LUOdacy;Cd5gLgCC^Bzjt!iAmM;tHEYX2sT3Fr^#trHc zu(hR+@?@jDePI(+Or+&RdE~ln?|7o!79RCxn;?{`9nnQ-t|%7PI)0U@NHoX=sVkmZW>-^I+coG1-!bEKB2p9Gn8_(;kNSMhOwxe! zJ3~rSsr8e2GB%;O7pfbq2xuBY+FiU`F;K#!8cdEEr#F>!C6tu4K*Agxbi4U4Y#G)p z^+vGY6NouFJB?j#Z=XiFjiNdH-tbANxkUOqI(5P6i~{Kl*Mhf{ba{DGRq0Z; zkMXW7KF~yHIjgdedXO8n6QhS+ZHKYvDrSiUOG~j!E7p_`j05z-!s1uoG$a?De!{6~h-|@DT^vu&kz&omvrYo668monmMTeVeB>_}vCPb09DfG0FM>p7vA+Fs| zcaAL8zFHq>xvMLzwG40P)$~$%M@K(~dkf)&uJ=b+t5UWx+G3eT7`LDPrcJ-W6cF&m-)f(K(YP;9PclS%4b7Ww;d*aZEh!7oBi!cPNa*Iqi zuw^AnI-Y@ro`s42v*saLD_n++%~y~S6M&gLB<66j1NtCk!F^2T0}rFX7S53P%7f6K z)pBOWV8?udr-GWKIEAcAY8p?p3~9TmCNb4vWQE};oDy&3rD1JU6S_@UPD-~Z9X%HM zDKQVI9ymRSFo;|P6q=m*xsyPc=Mz(uBJiPMs6gT{EV(Ofxe+8phvLsLW2xkX6 z;MSxFZjU68$rjVGN<1NXPEtECTAw=>p2;K*&&7by3EK%&&#K+_9{h~VJT`-F2bLR! zQKQJ9y?%~0A?*?d*OuTi?3E@&rz^W3K!8(qGUSS8+aAo=x)g}+em%m(kQWFN?*9SXxKW zeEd(-Ej=x2lc%B`-st$qiRuvJbEg$km61@ z$4lJcK^?Q1-yjiqSjFE?$%!%i$*d!{f!mC=EO7^Wu{MK|BhMjGJfz{pCA5%;h+gM(3!)i!ayY$D{}67xpvHW9 zZ$gcQJ>{5Jf4Wb$^X022&u1MAJ2Tk%?iSH05SOn;R>^W@!9AfHNx5W3$ZTk%6MmK- zK+08Y%OrO`04{wZRQaq{eX=?Y0X@P8_LWC{m~1 zE6pj5NApi!gaB&Qd0=iaas4BtH=~R^E!;6Buqn8#WJnK57rchP-rP{dbkccxFUt71 zi|mlA;!XSq$&l$EB@41yBnz8XA$mO{=T)${8Q7fB+9?X8?v>z1 zHlzxcvnnI!q~P7(*KS4Xli`te!5{^#TUlAB%8~VPHRToFihZBGOyu{8GOkkntO(T0sPOd&M;&g3^ z%j4@g#U=Kea)1}gQ!8D4BVDoa;bdQZN`|^(qrz#3PpMu6Pd(W5_4>@gW%pGv;M4WZ zy$|nKS6}ww2$I7E1D^c#)7KT76;5;El#GTb^GLyh0I`Z#k0o}8G>CExRAb`5`Du+mPeW;>e+!#HRTlK$%HjEd7DTEdqW)4 z#D%FP+o)$Ell&4%)uf8{$@{8MI#Dje@F_q+_+@ofhZ5Gxg&`!(mJf9=j2)q1J6Dz^ zg)!R;#c6u*MPA%7G6%XXfjaQpNfjFHsNrx23SiarMaE!a>nXZu1P|2i8|#nwB33_g z?5OFud{kpaGB;X=+R2Hu$zZ1e56No4hdjcW&NSUVjWx{@CF~{FLvXzJzL0?xlURH$ zub}e<#kDjD+J~-x4XKwP=@^X3uOUGT$-7gJJN^FJE`n6jYJt8 z+9RcP%hOaFhk?#h;!}XM_e@hw!w+&vom>O_Id|+(2)^88N-8d0X|lBucSV*QLIKLa z<36ytd-xpAB+2tCQ-rw9>WI)D5MY$hjxgDBO1y)GCaQ=qrWx7=5vEEEb)M;=y17yK z(WTvZCxG&`Rr&DVgP#@=WmpMv&ZS>CZFIi`(Nb!)w;_*Qa|EkXONY>ku`OdSsH-miDy#=ii> zNZC4FNo|f~0q^oY-fmiW>}&&7j|Wa=z44}kMe6RTaq@QTPDYlDy~pLN{+((H#9-MY-NXf6L4(w? z64b?pREkmMahVN*Od$B3=CwzhF?zd`1Z6x8P7LX(EUM`%p3={>G_T=CsMXV0B8vf* zaJ$c?^Pgd+G|qb7R;D5mbBeaeD}ahXEZH8FrizN1af_+QxIe~f$E6dPvLLXdEw7KIS^al-GrBi2xzR~p|PYZEX^i*2ChIxfwdikyk`(eh0JTR z3Bo85eb5Wz{dQxVZJ8*oMg`N_1llO4sS@L+gb#VASZK;%=xpbpye8u+k@wik)UrlB zp#O%^vdNtB{T{qqs5Q?-_C`sjkv)M8#T0SSG<(;_goC`(cqj~7(e52v_YmW$zz!~b z;@aP{2^ED%k`;uJQ42eux-YCh=_IwAs4W`xte0x@r0jIp(cBe#P2kQcHgip}{738= zV7w0=B3wOaK3QAvFhD1J(rYx~8KU~|#Araf6F{ky zkEy%Xz)VGii!1m-`rx&>`oj4 z%2S~UCcnfQvUY7XX^`u()g-i``NUcjeAfe=vK5L=l<0Q^kqU~v9X8bTDJL7kJKzmr zk-R4->Q|<_Xs_8ln&`I;nirbCSmGP_(`K+;b=(L*F`Z53jmINYax;Bo^AWTv{R@X- zY3Ttsh*VlYsf&8l&)}(4u@d7b_h;Hc(3u|db4(!gUfIW)%MrMWkcWsLdCiGQRp+Gf zgcEgA+nXKK9VF1>N$A}3PD~{o*mrf2O(TvrQ!S#aJHt4m_0nORXPx>yGaccbEl)k? zo#Qb1Pq^8w>U7|g?|2)%4#Lc0ko%6z6mU^X+xsN*{vxB}JM>=RCrtIs1RcaG(uJ8$ z?I|n?1L$w8@giYaVzsH&c#DdhtQ&IQfZDfRnbVL2`wdH7 zg6Yw97aFFI{iM%Ib;nDERcX)hT>9#=ihP}Yd*(c4=1g7!Po)pBq2GfGJgN$s;uBM90(^ndoyQKN5Gunz*#nX_0#VW1rn(`Dhjn`j9Op{Rh z+j2}7ntwRMY{5ArR-xI9Es^4N_*VKmkZl|?hUQi*>Jo=E9TmtUnsgcF` z?uWL8P&|OWlN24?MOkl)H>Ml+7DC^X>gG~%0bb?D=nPlkbvse2etp9d1<^~l)w|-F zuA#0dhqFQ4;#?dlLs2oqL`#PlPou5mPV)6T+d@t~bf4t3e6s_j3tjzc;~m#m>nS^2 z(LJ|wQy}aFAA+0n$(~v=>gSdYWC=aAM^7S=Ov`A~J_A&(K%gR`dV_ zulu2bLoa|#WGl!8#KDe=n)Mbt>oaXLC0$94IBNr0-XjR1NaUYQ1_0odT#zCQUgX#}~T&{46JKWpE zJSQMBalq?$;ghukAGO*rp9OEP$9{u<6mKuobOlh%JGy(YJ!tmZY-~jBibpxEv3vIJ z$c8c4vK^BP1J>DA=j^BHD2NeY;hykVB*39FxzS90h3V=r?p0{S?zZ!63`}Y5t7eiTuIGvegXMI`Cv(D$8r=52y3~i2muZF+GMPzx$*mWb+n8+vK zf8_eNY)@9(4GOfJ49zb=uwdaHfMo~^G(r6~a8T(WB|iRe0c z8$UznnsJERpICGERaM>w4Ucfp8FuQ0k!bqh^1d!lldC!jiTN}(F;vy)BQc)({Th}( zuy}BX=GV?bDbvlrp?E~ygi{*){Z}>2oOcNP8(H!_9)4syzgUnck{Y0<_$QWOEa#3z zd3xB`-*Wqsx~*!M^!&zIl4W|1bEzFoVr-;(C1y4UjM;HVA)Un+&zt-OAvlfE4BPNq zFwshd8i`8P6~&mti~9a8d*}766}p&6kxbLz;}WAku=teuswmaQo9`@C&akHiSD0?Z zvsu`_@s~wXVx~;TMWrL64Hz!7ri@hniPd2ofsCJoowh}sVL$tO1xLqlT^SixjE~ya zHP=CKt4PSDaVuNK-{1NZD`qn)DLLzL^ONrxw%25sAf4v!(AY|jP$tQBXQ2Xu0(3){ zu&lK-$DeZX94Ukdi!1JfH&)KD&r+X|$;s?$hO%MDoeg+gT4=CncGr#{ZCu zr)~ZYpOJ3p`6Guj>_XyI1sJ)X+~|eSlY)Xg?vwD!wcAn2HpZ$E0`q@j#R(;!%eB%) zeC<{(;43wy>qwz%7o0{Ehv1a1s0xZLLh-rmNKyk*XV4Kx!@@au=Vn zN$>W**vdK8>l+m~zc|SE?#vTBeRntKZ)yJ+OL5$iQYoN5)*^i- z(G!#(%t{~e15iZR;^@#Uaz`QNd0w{&|a=Q#OHLH|3} z2(Gop3;3L`^?$Gx;p(Rkn$7p5rNjPrxu%1V?i5VN@yP$79BI?o>GZ;|qLzyPyIkv_ zrq9+VeI3w@{~ZfQ%R1OHOtj&tuRTm;av~xcRuk zxUGWlaR(3*;p48q)cH9d*KrTGi-uoOD2-Kz9MU~VadoU@gT$><(KccMY2u4tI%zB)BqL|Ni8rw5 z?0D+?l{YJvRDyfcEtiDzNQVvPb81Nf2oY_s6Qg~Lh!9{eNBh>}5U$2xXqqmsh$DS7 z7o9y?j#5gFA|y6>_n1z&guJtb7g~a9=OZvXvm<`hrYcOT=bTa zWxcJ}!$d}M@;NW5?NFq9=0hK2VW}H^(R@`hi5Os68&?dGL<_nv75xhER3qLGJb|UM zL1I0eUwXVdR6>^Y!Jy7b6AvQtoIgq)a1~(%SYkfhQ;7+BQF7MRSwLd?K;JEmY11F%;7$5 zG2J?Hv<-r3j4k#oRGwt5LnIL6(u{e3{}E7kaCc-eq8JfI5td}#sL&<;VDd;?Ez^%r zgiNPzT}KIhJM}@B9;x1Jr`C;Q>L$MIWma^rOaHw4v1eQacP#ux$)hi5eRj9c8eEICP$behuW?%P35V-qF3o<3ZAx*Ka0-%`DbpIQR)&_0+c* z)lUb2g7}>i{m_pIIeklwuQ*kH3*o?j;Dp@K18zBuzfR{a?MTz`<(e@$Y&>Ts zD^JNhjAfIwna^#7XhcVUQMQjkqTh|sw+j;wyS2(nx6@)HmhV@-d$fyPh@y&*LHXPE zH+WFL(=tOSTx3f<7hLkK>n^;qf2&dhKnwN*r#L8mPw+RSdmx2RD}0w-V9Bf6jH z#Ro0bGCw)gNt2e;(CAvb57E3|`To&vHZ5Z=7-Mp~1WmUW2)3fSm!)?F^Ohl<3cV~J zySxpxeA-z_{q7Kvs)at{JYG^+QJt3Mbj@a&cF4m*cSLg_X-MsvYMY5~o;|;Y0Du~N zP5L(UqC96^*3=oGxon9llJ<&>$nHee|YfzMK}E; z^ylLz4CnfZ_yx;2wbu~H5ma~{bxC}4=2vZut92ok^ZH2$w@g{>vpN{T|H3hg^CKGw z75w&vQ4O?-(^L1w-D`n1*)Q&14{U<5L1eG_TzuHEqK!RgvX%elc-R>@TxxRe{l$k+ zOB8WGD2pJ-j(JgwSd}t#_C38iJk|WwiaOd?JNN<#2BQGugW)Lo^e}69!$D*$NZRNi zs(9=*(?dI?er%`xUJODjsxs|Sy_gshPz3e11t6jWC=5tx*=OH#sQUu@7o5R@f%a7M zx@n-dn_oqIWuDKz7feyE;_V3~0J<49k@-gY)@R|DiS*-?Mr!9oCbVfy`Qm1N6b^ip z=oP$9K~I4T@a$iUnCljw(ih?-OBsG>&^yIbbBO>ZSM!1sliFv6l#3)wp+nml(4I3e zO@0FQ@`#CcK;mL;uO}>8^r3;SRlru}3Vk`;k~d;;6VZ(0XchO0F+l`LSNsL#lBBtg zi#i!lT_LW|TjODpsoB`RUv=~D_H%o2q5|6B$gj{^5-F3V=VNuF(#n+(H5UI z#$Xsw0)$&0pw*O?x&tduuo=(_)O;bGuMTxe6M-473BHCrNM&eVl* zxbs%g|I^x60L9e=4PpTn7Pnp8ZE@EG7I)WR%c2SH@Zqx9;_hw<5FmI61eXPZg+Oq( zKmvgzkdWLa-+%YtRbAcvb#+y*>dl+k?bkii-EX>QrdtM(3{#Tmdk;r#q^b0h@@a-h z(8ew&2r2R;PCE#TcUPR!aT~5kj;lcFm)zWAGi^urRGj?uUQ**{01}{MdD*rrQ9kaH$;!0&IHJ9 ztYYgaHjoP;mUaS22$YOWd7e!%5Os3g@{&$1K8&m3R6EF28+xwn?0U6!Wri99@D= zVNGVyWUjnsE(zTZ$A3T(@iH;6OiUkH7$!<|!Ch)8(Ol6`{d)L{WNRaFmWB&3#I9wo ztz%qslBAb64g)W zgcsz_$c?@etaZM$PLm8UAa9BG&RE_>)U$M!e+G9c^70G00G{9{aqdFxSX2m@Hzs(; zD)%Z*6fc#5#+)AhrD!5`e~37QX&xwkAIl_v24HR;dJ>c0==XTpkFDC%=4tD}#h88x zWx2m`TWm^016FhnQAPtplW`WGCXrGjIR}#4A-jV06>|>mTl4&e@)Ujo`@wYUzx?sy zeBPl~PgWQ-AP?TY30#@`P1HhnTl^22}Xr_DdQb5MjD_8POH^1emmU%&-sd!ct zH)&2s8Vi0inuFhNY;VR0h#@sPIk01`IPiK9GP>3HO8y!4TEew)dFX5?l<)9|a7c^h z%%z8Z{RhZFStu)%hq6vTqml)voEPwsR-F_oE|#Km3^BynmrwwD$^ogyLMq_G; zrh|$b_3l_PPBy-Vl=>5WO=>yX(x)vY8=Kj7iG%j@Neh)cm=?81Ndq zwk`9%Gf+qH+N-hZ$e94>I6%e&$;0y&IW5`jD|Yy#BS>JB?H?fzFFz#llT`4WGNtnn*75W}Nv$5YuW{;yol)<7js5#uRZ>vKsu>1Mi z6y7o#t*=F3yq&;q(L7%CYRHd2X2{PlHkWP5EHc}!HE0X!UHn%(7~+%Jr*iCvzSmj( zfp;{*yv&Wx1V)~y+R6~Vv@YYsH?mGrTIUa5_r1=Yj07<8%?N_ z-q8LOn=Zjhon~7#lmFA7fBbJjWo{U<{fbRcW+lUqg|d*M3#9KJv`fZ(pxX=boVo0m z9Q(PI#+vy*TK^Pie|F@bCx@7_XB^K2pvxacV%PwqD^YWnzSaZ$PO>nNc?h>ZpVO;w z%jp1WAe2gfZf=sj<@)e%1t~(DwQ|gly!gE*$|7~7Qe&{S&D-4aUr6Sy2Ho_et1Fq^ zMHV87?54mh$rWd$XC3Lur~>-ue_(%Ecz*j-cqN)Wi2~?jWnw<;pj21|_7q>c zT84L$o9s|N=mAEqK$5l(tuo6;?O1$u+GYBr!FPY_-NmCF6sf2PjuX7br|nftp{A@Ztb*ZByAB%1 zVI6*2%pO7w_yKI1>tb2N8voH_bV;nrVj1{Bip09pX{*#=tOoes_qO7Sn=9!f$*lnR z9^BT2({yocDXM=UO@SU)7FvR`I?{8tCV>66PT3~s^x>5xANHAEckM@I5H<4 z&;@^k@&WqYFH(J-=ZCL4B{C|2(#f-YmXkYMg?g)UDf+DGNOf`_k}j_W!Lo znp4CrucqCYbZYY>oH*3z@WtB~@Bk+N@TOammLC)=J$F>1A4a8q%8rTlaQW!&mrTlL z2C`phtY`fnA2Yluo?TKHc^_&yykxS}DD1<$_BejMxSu73j^mU*MR9y$@`+KfCWTGA zHx|jqvcm=qvc~nUOO&&%TD0|+mL&394COp-e^EkHofS>6u5{cfTmNePLw;UGzu!O3 zDxJC|BElsC_3zI5odX9YQoakV*6(TMGLjmSnh}c`JS^bb(o)i@-Mt&Vb2U&HCw6Ua zfc<#1aIl-2S;ccr^}OPaT4uUlUwQq*)2nsJZolBwNop4@Tpog(#b1Tz@=Lujr?t0g z3kX@th+QHgBUu6j3wxQbm*7KewVDZLIo!v?A4d!J#LvtQ=43P9lioiN!$t6JtE*Zl zk*j#~9Xcku@OZguP@1<-*gtS_btX6a&~Ptc!`A@H0JsKQ(yQE|s9_6!@J|{x0c_^s zc4niYW(U!GGlXm}D2=?#Pu6Oet>^W7eTDPtImrPv6YfCzszp{sEPN|jMd4V&c*Zz9 z2W`b&v*3ah6Gi0V#*VQlr$B;JyolXvTBKXDFFGtsO@B>*9uH z9m)nFON5*wf8_~Mo0*W!6N&$zf#hm0ykh;Pe=q8})grmu54jo)c_faQmaB3&4ydP# z(H7I`D$pDmRBm(m7HkC?jq4e1k`IQAzjw+pC)L^rWmYJYRDm#0m7jM`zo;#CJe^j> z>a3_TUdsL?#?ywjosl4~8$We4W6Ky1Z)dA(-KYbwHfL#1WQidLu-l<

Dm`J#k8vSFayJ?mv!9sd-t>WMp5b5N_Uo6u}mYX{4Ayqx9Gv*5r}#&APh@cw*Q2Y@_6xsNrje+d$>KY()%7r{4FIPamf#e1GUVb zU@e@`al)8go8p|!y<;;o7TPsT9u>`eji^D5aimqSvp2p5P-r_wD47VgdTurbMoI$0 zU>g}UqmQU={qJkY6>t=j6SbI|C^En3Ve*RHCp!g+Nqw=?XNrk6da5%-To{#D6g#cs zZ>w<3=`D=E=e|maopAjLQm&XINzyi~9o-WA_z;qM%ggDS+moA;8U>4DvlIiF#5b5u zqX3o0RTRPr#^K9)Cq9?1JiI3=ns-`N9()GyN|(zIA55syFKVWZ)&b40Y$GUtnjIi+ zB#;cEtj67ute2`Lw@OtvxCVwVA7ma@oxQuvlRtUuRl_@v+^=Y~;y}kZXuLa_EFbM5 zmC&VPBQnW!e+_7e5l~SWvtrA}I!(v5k|ta1aZtpsGoMZdmfKpF5~#;J0fUZ1bvqfAt{j(ZgdUS!m)G)O?>=^$|D zSQW`BJH#p3&b#i(y%!KEb}mGnKE~O!W5B2_edJBVgNHDw)e?w4AborMFJ`=@0m*95c)N(^r|DDD~=EthGz7h7msaRY46?iB0KiY?#S|++nJB*H_+1 zgwsK(1*=c4`SW`>HIb9K?7m!2az%JAfW-;m#rGJ=qd!(*36(8q&fsvFwNAk4gne~E zYb%PyFtD&}eID}6HE&PA;?0|G-gGC8mH`n0%U~gGH@D2acOoNWi_`PHM%CY>9;tfv zPVcVgtGTsIeCA4e2)UZxZ)66A03F^OQ#-Q{E5aa(JH>E5m9{bCg!&j@udWu<5WMYe zaQu(rM6A~GE*}Z#aN1W@4&mT`^H|=-7A%PUS4{P*SjRBAp&e@ zyhr5DnmqW>+mD#6@Cdk3t$qS$s-7bg_sB@Bx?TT`;e2P+r&E;M_AdGO@i@I=b0;|F zEmi`RL<@rfkNtL6WXD!u3`8Z!Qy#3?feTCZeeq1N->s@h{$C1PtEPB)Bk0gw9C8+4 zNq*O`3 zhS-Y9Gs@>pso&g*yvZvYIUQlBcOPj2Bwl5#UFOiL32QCPlWT2&21X1ko^@vd1Sx%l z3fv`-M$yhLWLTq&UM?HTJqd78GD&|N+ndw_V*(dPmi*n;LTRJ_-t`*KXC)e3%(zh{ zeL9IupIN<{2N@O!Da^ti_I~yqMMsrb9_fpJWB=}BEL{Bv=+y9!oDT1Ra0zQvDnHI5 zK#QnYY4=6a&$bVi#E8~DC9wG*66BwoN~BeZgw(O%DOOh`qAE6)Wm7>20ZEVsODVQs z22qK0E|tOe`OYdyOnO0g^`HVOunI>>7HO)mAoq6FuzHY}AY+Ud6{YZFj9e^mG5#nK zme>fu&}7ozaUOJ^`=|yra>M8;z`aT{1qO2;W^Q5KQk1w5^P1p3PR)($&- zG2+1;BZnKA+zRXCvQ-y)oR=0$!^_mBQGW)Z_w{Q>w$X{oXxAt|OSdoh;e1Pj%3pIVBRtAlCI#XwT9L2JxXE;=( zCl;8nhwnyDP!<@&cH$qEe%U40u;Ss7o%8!Wg|;MM6W@au6;~+!k;#mG&I!xw7P(xD zCwljQ3j97Emql_3#rlY zOYk^1Vm5=e#*mzaSF>d&s%O@|^b@_g6J44)Akc}^T(z;|NVMW3=PZzE$>;Wupa?E@ zBK-}3jU1LKcd8mJqmxQuj5+%Kg{|o@?RXVLV9r0!ly)APS9 zC<`l$QF`4E-CPwYl_VuZaF$*s_X=f$1%GH)SO$tyD$k`%(sih1QB^kNDGnRhKCr0_ z_Wwt85!btC2XrDikc6IJj#jP(@!;$L{c#}(&L~WKmRjtxZD|Xfop)r?6-B5Gcq*s#0$axvy;;`6MircCfS~o$ z%+4!%lRTR4c)p{b#lZMr{XI}SjJ40onZ@xbjgXvKj=Q8^A2ztK`1*)W#Sf^;?k$O2 zyBGOu7gYD?y9{7(e6fShFhPjtv|67>3!ff9n|D}h4qwu&swYr_)&;;fLpoHm6mmfN z1UwQzXI+2SL)^&mMnd-Kh98dp-P3mt_Cb!!cG z7WBuheR@ikZoVU=Kr6Pf+QWrk1>uh^$Y=rQtrxT4WV}_rV-IbMGu3Mqi8$9Ys1Big zyt+tYN=ctkh$0%6u_*y*l@PSNZ3h<(C&nM*v$QLfHa0yrlq%1APsDffOlDT9#|gdH z9O2im+Ktk z4`J7-(@??(r>xtTeA2@bFX~eZFNwXs3k>>9!lZ%@GlW_8bd}=|cMhpYcmPG>Y36>e z^4&PB!MSG9454bT^S+0_%E=>eRyAryM5q9~dD5S!oN16* zl9^9A`Y1aN_fa;##*rAs-ne?NSDEmY#GL8HfXmf<9c;z^IG>MN%dx3_lP~Wa|4jc! z=~qFAEzldJMO0Eq$$nD9JGwkE8ZD%)^YS#E3&eh%f;e=IIQ#0U5efMabk40 z_O_pkNj~}7-^xBKG&Hq`m0WI^^eTdfm)$5+i+W2?$T7wyO)H^EolRp9bvMHbi!!<@zC3HrhHGQh1 zDR|p%WUe@PH0TVJe@>LqI`Wo7En+_J4=I!fsgl}Xb66lhu`wlvZyr>8CM&;vwnLT~ z>GLr2Rrv>!`8R71jEU+8245GM;77T)n8p>{f66)@2i`<|fJQ z9Tb}U$A6Smvy?>I=!jDNiFoXFne?1ywKS=XU6p|n0UGftG_6pKR-$+kCJIFPh1ZvQ z;@TAWA4utM6^ko(P8B-76uuBZMScaQdGABJFdFhM)wLfV;I>BRJdpcQ&ocLg@x!4L zu|)E2fm807iiOVl@oP_e1{Z(9WPu2ZqJJkpRoY%Bl%Xqv1E)gort>xCgWfoF?OK{W zrRP{ti?EV0-FHd4g1aD47Gh)j82oH8e^$Qq@!EqFUd&qES-x`K9KM zbbdP~AFFe7!@i!-J|!Gm7B| zZq(e)(&1+{Mn9?RL;-z4XQ_`Cp1ou7Ub+-`m!kHBMofn)-m`Q9kG?zmFn+{7EVljV zxIFq0v)e<2Y=deE#Q9C4hEuwfp>Wxt^86r#W>m?ZCJ8BmpVaWu*g5p=_qM-k$pl3H zT8_)P@5KCyNZ<58G$aMdck zV4JK@-bna8i7eB%Kb(|*;&12bZl~^=QH0-B;RmWbQsHvqvEEIUXV}=gPQ9xtN2hHI zMXWrv!_i>;$QHX`&}n{1lx0;VlAg#(6p?CuJ=@v5yZEf;1F=oXJvdZ zDtQr>w)3y=L9h!m#l%*Y^h3_dpr=Xzz6*<0nLSgVt`zBf2scQ;yudYOau5% zc1__j<8#jPe&&yRa;jo_Q`{MUf-Fv)&&EF)QX$Gz`CjJZ8RaT6vt@R7)ojqBCNqUF_(W6US{T=8wD^ zL7hSFh*PZ8KpI9pL#dKE_ndhvd`PX|f;|vbO$-R5nG)`M70qo|RW<77OqG2hum~BW z=frG0z6B(sy_Y-9horCUiHSMsCve#vt%5zWvLDWKp+B-&s~AW_Os;$1@niS^^?&t1SeAK=6ZS#FaM}&x1}fStBl3Ynrbr8R?6Hw^JmtJ zO(S9* zL;sG=(nbCjq2DZ_rQM(D`$HfCZLocBooZVW3+CYCwm9Kqr?RxFD2B;Ph&Nh~R$ z^81oTR@H>tQ%fF zg**;G)e99Vwy!C0Cf((o_{35?HsceJKGs)?X@Z=q|M_$_JKCE*j2dwb_IeR^r2wxc zceXOQN%pxhdq)ud2bWytIHS5Ga4!}c(H>>$OTI&7cc!d;&zPq$T-V2&TqFty&M8>? zmU8>8q+Sf5ff^M>6+*Cv%}2TE)E6xra>t4dZqLr~gDB?DjaF1lJ`lfN4c+3$7jF<& zsv}^)cYQql8zbh?A9XB)8)IP0B*?AOO`5!(6`T}$v(vn#?(-!G?G9))E#P8GYxll& zGGd^SQeIy)$q{n&#g&(J=Eps7!U>vd9rw?xA|l!Jd0Q}cs|*J|DCI@r5=*-ozgoGP zbFF>ZPn3D5%r0mi9(5ed&HpqWm(sCDry2!WG}b=)A@{H6PH#F4@3<*S%}M{+f^>TA zH{i_ZJ!{VDI8-!#!S&0ZwFy4mZjEXGG;pVM4jXlgV`{#kTefb9+WdA>^0h?n2+;>u zoROk!o{L=!4&Mc48#l+k2!C${FnvU3$(9i z_N`bV>+A7VK#kR$o!J4(z8(FsIeue2z`Ps0bAKO!R=k2&BYyCu9O1FX>O^H&o4%{? z;yZLCu>vVvXtT3;3MP)j@^na4Vu&AJ;mp)Z=tOUdaD!DnObq#?A%WLYBNSt9F9v^R z%zlVZ3%4pJL7Wj^%W*K8Ec1(iS*?XjQukJEZ}?QvTZhfS#7(ZClHLEp4FEJg7U`SY z7cHO7lF>dEt)BX#8zTceLD#+FP?=e5Ea5cELO%-;1J4|Tg-glI|4U&1zBwg=r3$2j zBQgGu1lweivW=du{ZCH6T0c&|Lq8|MVQr8bAay%`1v(Q8r=eThV;F1f43#IOy%4j# z`IR>Ch%4^-#czxsrq;R7AxP-}{~zx0gn_i|PitR!{Lp(rqPMl~yv!>@oU|35Jm9(Y zu;bG>Nx1CEb=q%?`|v)z<)GIx0c*?j=5m~PoRi;0AK-#D++$$BG16uqbx8z(SDaxb zi2uS3s7+=^8>YtK&sm#7Q`6Z?1ebXDjPIREuE~7YK!e9wuvl_zB#-W}Ebyaf%k2OC zHr5RaArbxgV)%QF>ovJ!A0HsYvV`nn#b)T`(`P10c??-Wp-cbFQPc#QU9xvZH`nt&QCm z+C3vH^Gp5&UTl&?gAAZ?HR4zR$;~ag-9&v4zm$gf3jft`1<$Ynhl}%o6xFq=3^HQn z=3QlIVA6Aggpmr^CMr(f=slN^@&9ehqTd*`V#4;-E7e@mN_{<}kd3@RvnI!_v!7)QyX7^>UI=~{ z6*vAoNiG}d)aH_l1B5(r4l0AJa5X{6|9G{H-ZkR=0-9Q94UNasgIBEZRW%pdJd#Y- z;X$@2Gd^<^a(yGX6*RpDN<|2Pyw$Sa--n=a62C?3fmiCkF+QUgiQF4UBAU*y++Cb5 z9bZg2JF6wD@JCE>ygn(20QJX1g(KK>(nm&w-Xu%YZ~vqBZ}0nkd<7tzE0umd!BjYX z^o!c;rZHjo{r5^{4WcJj6$Jb>O9o$BE!^gOog}&*TGFi?Ki>9@{J%vtt{jWbU)HI8CU;o|bi|6q_lBq4?kZ^Jo_- z$wHI3uN*GDA0^vjmN;e2(kK61m_7yBX+Bdp)OkGdUpqH4f*E3UBZfi?FMUSrbLPFhtWBl2E8c7Y@a0+P9Y~%KpOY zc!({HwfeZqZe@%|-A!%ZyuS3w`Mx4nD!H&8?r4ZK*sMscNp?fb5q6_gh7LUYD_5;q z9(F_eYNXMBM^DkQZP%~o2I-U~HtrKQW3FuhfkYvjbw5~QR+@k9M$oyE#rHbJxKJuEAv z``$$&{1 z@Wn00XZkmzk*{T2t>fu^J`S3ZQF4A@DzNnbjp6r*hG_6LA1JDoOy1s}>jyqs%@|^n zKS zOIE%ON!}!ElC>>oFpijYisOj3afT_CqO5ac&O5Vc87 zOoG_CPHHU4s+e4&&4vaC~~V57E)}pzbDsv zh9$5gt*@8KXZsaYHy-o}OKs9u=oV7HTu zG^Dfu=^~GkxWwr_Wq(?|j;xA@r7{N%4m|=2irKIiJQySKu8nc8tjqRPxM>L#!B) zFIqI${p^LMc;?su3V4T}b$cFGJ}-SSI`}vqJU8&PSJa&9>KdN}ygvM96)kBk&l5YI ze^UcBW8G_SjnG~e{DF(+T-_@MYQ$y4@${IX<1<4OIw9RPn{eghspo^}G$YMn#DaLg zF}&*0d4=qYmuxy1)^XKD{ltq18F@s-NyhfH#xq_?^9y-j?Nac_eWuO{TdNx<*8aqy%`H)h$iWL121na8B%3z zA6h`oaX1mJW zJfq%dKW#lE8^HA}4ioz^@P6>2>_{ai^z*8dtx)#dWRaZwwo>_M>RgvQ=-|#a&7rFcfmcMCsT% zp@P3Jjn1rr+yy@q7qf8hGOLHukYwt);yshp7Va*$QD1!&_>;K|&F}sYPC9=U;|e+g zUTNO6Nr9#z5!*BL7e2A+v2u4OZ9HDNCbCv3gV?M~zY&q=c|Vc}(Ls_W{;J~MRtm%-G4TZp22UGGsiw|^ zMxsR3sRFAtd}l(Y>UllV-VGW&V#@b3bImxOnr7+a0RXc(FJF>wRC+jG4xBCS^{Go# zrezL_7F-pK3C74o4N@O4uG74IIEPn+8QFwF3H2{qJ62z=tS0M!uu_&B<{qcLKEjAe zRQi&Y3{opwwQ)9WBKDT1sQKt*#;`v%@P1n(4{V|i7(Uij5`Lc=NDeM zhGlu|4N~<9_~P&w$-S{29^P+xL9|DLjs{4#G<^BktU%CE0hdqxHo7pk55G}mE}JeM zF5b5Mv%+LN>toyFg<$w$*&mcONG5DB?4P=#qqT?)S8JibvrC;f`nv_2mZyNdv!PzdaOXq$zECHOW*9-ssjNn`Dp%By@Kg{%~$rj)Q)b zDmB8cUzj88(bhh(efIz*f2qxMwvRQx%H Date: Sat, 30 Sep 2023 22:53:11 +0300 Subject: [PATCH 13/53] Add README.md --- HW4_Voskoboinikov/README.md | 75 +++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 HW4_Voskoboinikov/README.md diff --git a/HW4_Voskoboinikov/README.md b/HW4_Voskoboinikov/README.md new file mode 100644 index 0000000..fb38a4a --- /dev/null +++ b/HW4_Voskoboinikov/README.md @@ -0,0 +1,75 @@ +# Ultimate Protein Tools + +### Overview + +This project contains a `ultimate_protein_tools.py` program, which implements the `run_ultimate_protein_tools()` function, the input of which is protein sequences and the action that needs to be applied to them. + +### Installation + +TODO import from module!!!! + +### Usage + +To run the script, just call it from the directory where the program is located: + +``` +python ultimate_protein_tools.py +``` + +To exit the program, type `exit` in the console. + +While running the program, you can use next arguments for ***run_ultimate_protein_tools*** function (next – *main function*): + +- `get_length_of_protein`: Calculates the length of a protein. +- `count_aa`: Counts the number of amino acids in a protein sequence. +- `get_fracture_of_aa`: Returns the fracture or percentage of amino acids in a protein sequence. + +- `read_seq_from_fasta`: +- `find_sites`: +- `get_protein_rnas_number`: +- `get_protein_rnas`: +- `get_frameshift_proteins`: + +- `calculate_protein_mass`: Calculates the molecular mass of a protein based on its amino acid sequence and a dictionary of amino acid masses. +- `get_atomic_mass`: Calculates the molecular mass of a biological molecule, primarily an amino acid, based on a simple chemical formula. +- `convert_aa_name`: Converts a sequence of one-letter amino acid codes to three-letter designations. + +For additional information about arguments and output, please read the docstring for the desired function. + +### Input of data + +During each run of the main function, the user is required to enter a **protein sequence / sequences** that must be processed using the procedures listed above. + +The program involves the analysis of protein sequences consisting of **20 canonical amino acids**. + +If the data is entered incorrectly, an appropriate error will be displayed. + +```python +run_ultimate_protein_tools('AZAZA', get_length_of_protein) -> ValueError #TODO add error message +``` + +### Examples + +```python +run_ultimate_protein_tools('MAGDVLAGTTTSDRAAGALGTLGTAATLRAATDGLLQR', get_length_of_protein) -> 38 +run_ultimate_protein_tools('MAGDVLAGTTTSDRAAGALGTLGTAATLRAATDGLLQR', aminoacids='AT', count_aa) -> {'A': 9, 'T': 7} +run_ultimate_protein_tools('MAGDVLAGTTTSDRAAGALGTLGTAATLRAATDGLLQR', aminoacids='L', get_fracture_of_aa) -> {'L': 0.1579} +run_ultimate_protein_tools('MAGDVLAGTTTSDRAAGALGTLGTAATLRAATDGLLQR', aminoacids='DRG', get_fracture_of_aa, show_as_percentage=True) -> {'D': 7.89, 'R': 7.89, 'G': 15.79} + +#TODO examples for other functions +``` + +### Troubleshooting + +TODO change this + +If the program doesn't work – try to scream like opossum. + +### Contacts +![Wonderful Team](https://github.com/ArtemVaska/HW4_Functions2/tree/HW4_Voskoboinikov/HW4_Voskoboinikov/Wonderful_Team.jpg) + +Aleksandr Voskoboinikov – Team Leader (wwoskie@gmail.com) + +Artem Vasilev (artem_vasilev_01@list.ru) + +Tatiana Lisitsa (ttnlsc@gmail.com) \ No newline at end of file From 27b38857d136a2df5ff44056ec79f7537344fe1a Mon Sep 17 00:00:00 2001 From: Artem Vasilev <135062527+ArtemVaska@users.noreply.github.com> Date: Sat, 30 Sep 2023 22:57:06 +0300 Subject: [PATCH 14/53] Update a link of the photo in README.md --- HW4_Voskoboinikov/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HW4_Voskoboinikov/README.md b/HW4_Voskoboinikov/README.md index fb38a4a..3f23c7e 100644 --- a/HW4_Voskoboinikov/README.md +++ b/HW4_Voskoboinikov/README.md @@ -66,10 +66,10 @@ TODO change this If the program doesn't work – try to scream like opossum. ### Contacts -![Wonderful Team](https://github.com/ArtemVaska/HW4_Functions2/tree/HW4_Voskoboinikov/HW4_Voskoboinikov/Wonderful_Team.jpg) +![Wonderful Team](https://github.com/ArtemVaska/HW4_Functions2/blob/HW4_Vasilev/HW4_Voskoboinikov/Wonderful_team.jpg) Aleksandr Voskoboinikov – Team Leader (wwoskie@gmail.com) Artem Vasilev (artem_vasilev_01@list.ru) -Tatiana Lisitsa (ttnlsc@gmail.com) \ No newline at end of file +Tatiana Lisitsa (ttnlsc@gmail.com) From 4e56561a16b0e9bbae00a2ed3d38ccf1f27bbf4b Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 02:54:23 +0300 Subject: [PATCH 15/53] Add RNA_AA_TABLE const --- HW4_Voskoboinikov/ultimate_protein_tools.py | 26 +++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 3fd222c..0618f8a 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -82,6 +82,32 @@ } +# TODO check if possible to rempve kwargs +RNA_AA_TABLE = { +'F': ['UUU', 'UUC'], + 'L': ['UUA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG'], + 'S': ['UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC'], + 'Y': ['UAU', 'UAC'], + '*': ['UAA', 'UAG', 'UGA'], + 'C': ['UGU', 'UGC'], + 'W': ['UGG'], + 'P': ['CCU', 'CCC', 'CCA', 'CCG'], + 'H': ['CAU', 'CAC'], + 'Q': ['CAA', 'CAG'], + 'R': ['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], + 'I': ['AUU', 'AUC', 'AUA'], + 'M': ['AUG'], + 'T': ['ACU', 'ACC', 'ACA', 'ACG'], + 'N': ['AAU', 'AAC'], + 'K': ['AAA', 'AAG'], + 'V': ['GUU', 'GUC', 'GUA', 'GUG'], + 'A': ['GCU', 'GCC', 'GCA', 'GCG'], + 'D': ['GAU', 'GAC'], + 'E': ['GAA', 'GAG'], + 'G': ['GGU', 'GGC', 'GGA', 'GGG'], +} + + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. From 55940d2665627956633b92612162412bee57fa12 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 02:55:09 +0300 Subject: [PATCH 16/53] Add RNA_CODON_TABLE const --- HW4_Voskoboinikov/ultimate_protein_tools.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 0618f8a..fc380d5 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -107,6 +107,25 @@ 'G': ['GGU', 'GGC', 'GGA', 'GGG'], } +RNA_CODON_TABLE = { + 'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L', + 'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S', + 'UAU': 'Y', 'UAC': 'Y', 'UAA': '*', 'UAG': '*', + 'UGU': 'C', 'UGC': 'C', 'UGA': '*', 'UGG': 'W', + 'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L', + 'CCU': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', + 'CAU': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', + 'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', + 'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M', + 'ACU': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', + 'AAU': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', + 'AGU': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', + 'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V', + 'GCU': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', + 'GAU': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', + 'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', +} + def length_of_protein(seq: str) -> int: """ From ca02a056d75ac2bef7604e4ca16a05d3d67d93fc Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 02:56:29 +0300 Subject: [PATCH 17/53] Add read_seq_from_fasta func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index fc380d5..89f7659 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -107,6 +107,7 @@ 'G': ['GGU', 'GGC', 'GGA', 'GGG'], } + RNA_CODON_TABLE = { 'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L', 'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S', @@ -127,6 +128,21 @@ } +def read_seq_from_fasta(path_to_seq: str, use_full_name=False, **kwargs): + with open(path_to_seq) as f: + out_dct = {} + for line in f: + line = line.strip() + if line.startswith('>'): # check for first line in seq + if use_full_name: # check if user set full name in fasta + name = line[1:] # take whole fasta properties (e.g. if names not unique) + else: + name = line[1:].split()[0] + else: + out_dct[name] = out_dct.get(name, '') + line # get value from dict (return '' if empty) and append str + return out_dct + + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. From 6e1d3b9fc50b1e1a519fee7ed5cf401078b1ce3b Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 02:57:18 +0300 Subject: [PATCH 18/53] Add get_sites_lengths func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 89f7659..69cccc3 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -143,6 +143,13 @@ def read_seq_from_fasta(path_to_seq: str, use_full_name=False, **kwargs): return out_dct +def get_sites_lengths(sites): + sites_length_dct = {} + for site in sites: + sites_length_dct[site] = len(site) + return sites_length_dct + + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. From 6e033723d1079d74e335ef3bf789c8dd6f1143f0 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 02:58:01 +0300 Subject: [PATCH 19/53] Add invert_dct func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 69cccc3..8b5b91f 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -150,6 +150,13 @@ def get_sites_lengths(sites): return sites_length_dct +def invert_dct(dct): + inv_dct = {} + for k, v in dct.items(): + inv_dct[v] = inv_dct.get(v, []) + [k] # get value from dict (return []) and append key + return inv_dct + + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. From ff0b3ab35f297bf7f659732be691f364205b101a Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 02:59:54 +0300 Subject: [PATCH 20/53] Add find_sites func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 8b5b91f..ea4bc2b 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -157,6 +157,21 @@ def invert_dct(dct): return inv_dct +def find_sites(seq, *sites, is_one_based = False, **kwargs): + window_sizes = invert_dct(get_sites_lengths(sites)) # get lengths of all sites and stick them together to avoid passing through seq multiple times if possible + found_sites = {} + for window_size in window_sizes: # perform iteration for all given lengths of sites + for i in range(len(seq) - window_size + 1): # iterate through seq with step one and consider window of site length each iteration + scatter = seq[i:i + window_size] # get fragment of sequence with length of window i.e. scatter + for site in window_sizes[window_size]: + if scatter == site: # check if scatter is site + found_sites[site] = ( + found_sites.get(site, []) # get + + [i + is_one_based] + ) # append index to list in dict + return found_sites + + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. From c64046bcf2d6cbe3960a6907d5aa87e6c4d9e1ae Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 03:00:39 +0300 Subject: [PATCH 21/53] Add get_protein_rnas func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index ea4bc2b..882c833 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -172,6 +172,22 @@ def find_sites(seq, *sites, is_one_based = False, **kwargs): return found_sites +def get_protein_rnas(seq, i_absolutely_fucking_know_what_im_doing = False): + if i_absolutely_fucking_know_what_im_doing: + kmers = [''] # set initial kmers + for amino_acid in seq: # iterate AAs + current_kmers = [] + codons = RNA_AA_TABLE[amino_acid] # get list of codons for AA + for codon in codons: + for kmer in kmers: + current_kmers.append(kmer + codon) # append every codon to existing kmers + kmers = current_kmers # re-write k-mers for next iteration + + return kmers + + return "You don't fucking know what you're doing!" # politely ask user to reconsider their actions + + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. From 96a0ad712ab30b30888fa9b28f1475c7dd08b1da Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 03:01:15 +0300 Subject: [PATCH 22/53] Add get_protein_rnas_number func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 882c833..9306893 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -188,6 +188,13 @@ def get_protein_rnas(seq, i_absolutely_fucking_know_what_im_doing = False): return "You don't fucking know what you're doing!" # politely ask user to reconsider their actions +def get_protein_rnas_number(seq): + rnas_num = 1 + for amino_acid in seq: + rnas_num *= len(RNA_AA_TABLE[amino_acid]) + return rnas_num + + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. From 6c8c71b0bc8e3c10b04479db9a03f2a631727c29 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 03:02:56 +0300 Subject: [PATCH 23/53] Add is_protein_valid func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 9306893..4838e38 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -157,6 +157,12 @@ def invert_dct(dct): return inv_dct +def is_protein_valid(seq): + if set(seq).issubset(RNA_AA_TABLE): + return True + return False + + def find_sites(seq, *sites, is_one_based = False, **kwargs): window_sizes = invert_dct(get_sites_lengths(sites)) # get lengths of all sites and stick them together to avoid passing through seq multiple times if possible found_sites = {} From 2c337053e522fd134a7cecd845013eb41bfc6335 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 03:04:10 +0300 Subject: [PATCH 24/53] Add get_frameshift_proteins func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 4838e38..c79b795 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -201,6 +201,25 @@ def get_protein_rnas_number(seq): return rnas_num +def get_frameshift_proteins(seq, i_absolutely_fucking_know_what_im_doing = False, is_stop_codon_termination_enabled=False): + if i_absolutely_fucking_know_what_im_doing: + frameshift_dct = {'frame_0': [seq]} # set current seq as frame_0 (protein from not-shifted frame) + rnas = get_protein_rnas(seq, i_absolutely_fucking_know_what_im_doing = i_absolutely_fucking_know_what_im_doing) + for frame_number in [1, 2]: + frames_list = [] + for rna in rnas: + frame = '' + for i in range(frame_number, len(rna) - (frame_number + 1), 3): # set frame-dependent range to iterate + frame_codon = rna[i:i+3] # extract codon + frame += RNA_CODON_TABLE[frame_codon] + if is_stop_codon_termination_enabled and RNA_CODON_TABLE[frame_codon] == '*': # stop writing if meet stop-codon + break + frames_list.append(frame) # append frame to frames list + frameshift_dct[f'frame_{frame_number}'] = list(set(frames_list)) # clean duplicates and write to dict + return frameshift_dct + return "You don't fucking know what you're doing!" # politely ask user to reconsider their actions + + def length_of_protein(seq: str) -> int: """ Calculates the length of a protein. From 7e64e8cdf48d155921a385ee00f0a1a09e6f28f7 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 03:05:06 +0300 Subject: [PATCH 25/53] Add COMMAND_DCT dictionary --- HW4_Voskoboinikov/ultimate_protein_tools.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index c79b795..8554739 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -390,3 +390,12 @@ def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = else: raise ValueError(f'Unknown amino acid: {aa}') return new_name + + +COMMAND_DCT = { + 'find_sites': find_sites, + 'get_protein_rnas' : get_protein_rnas, + 'get_protein_rnas_number': get_protein_rnas_number, + 'get_frameshift_proteins': get_frameshift_proteins, + 'is_protein_valid': is_protein_valid, + } From af130031163562868c26e67a9bcf9496a3694b5c Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 03:06:43 +0300 Subject: [PATCH 26/53] Add run_ultimate_protein_tools func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 24 +++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 8554739..39dbe4e 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -399,3 +399,27 @@ def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = 'get_frameshift_proteins': get_frameshift_proteins, 'is_protein_valid': is_protein_valid, } + + +def run_ultimate_protein_tools(command, + *args, + input_path = None, + input_seq = None, + input_lst = None, + input_dct = None, + **kwargs): + output_dct = {} + if input_path: + input_dct = read_seq_from_fasta(input_path, **kwargs) + elif input_seq: # TODO possible name parsing + input_dct= {0: input_seq} + elif input_lst: + input_dct = {} + for i, seq in enumerate(input_lst): + input_dct |= {i: seq} + for name in input_dct: + if is_protein_valid(input_dct[name]): + output_dct[name] = COMMAND_DCT[command](input_dct[name], *args, **kwargs) + else: + output_dct[name] = is_protein_valid(input_dct[name]) + return output_dct From 02f8fde2658e919c828549b0d611e4c0a1d50277 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 03:25:16 +0300 Subject: [PATCH 27/53] Add docstrings for read_seq_from_fasta and get_sites_lengths --- HW4_Voskoboinikov/ultimate_protein_tools.py | 26 +++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 39dbe4e..074e3bc 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -128,7 +128,19 @@ } -def read_seq_from_fasta(path_to_seq: str, use_full_name=False, **kwargs): +def read_seq_from_fasta(path_to_seq: str, + use_full_name: bool = False, + **kwargs) -> dict: + """ + Reads sequences from fasta file and returns dictionary + + Argument: + - path_to_seq (str): path to file + + Return: + - dict: dict of sequences names as keys and sequences themselves as values {'seq_name': 'sequence',} + """ + with open(path_to_seq) as f: out_dct = {} for line in f: @@ -143,7 +155,17 @@ def read_seq_from_fasta(path_to_seq: str, use_full_name=False, **kwargs): return out_dct -def get_sites_lengths(sites): +def get_sites_lengths(sites: list) -> dict: + """ + Takes sites list and calculates their lengths + + Argument: + - sites (list): list of sites (str) + + Return: + - dict: dict of sites length {'site': 'length',} + """ + sites_length_dct = {} for site in sites: sites_length_dct[site] = len(site) From 2bf5db2b8f350bd0787314dfd31383b4512fc98f Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 03:27:07 +0300 Subject: [PATCH 28/53] Add docstrings for func invert_dct --- HW4_Voskoboinikov/ultimate_protein_tools.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 074e3bc..a079d16 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -140,7 +140,7 @@ def read_seq_from_fasta(path_to_seq: str, Return: - dict: dict of sequences names as keys and sequences themselves as values {'seq_name': 'sequence',} """ - + with open(path_to_seq) as f: out_dct = {} for line in f: @@ -172,7 +172,16 @@ def get_sites_lengths(sites: list) -> dict: return sites_length_dct -def invert_dct(dct): +def invert_dct(dct: dict) -> dict: + """ + Inverts a dict + + Argument: + - dct (dict): dict to be inverted + + Return: + - dict: inverted dict + """ inv_dct = {} for k, v in dct.items(): inv_dct[v] = inv_dct.get(v, []) + [k] # get value from dict (return []) and append key From 8477d9fd12c3551cd08dc89e0d99788e4f54d499 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 03:29:13 +0300 Subject: [PATCH 29/53] Add docstrings for func is_protein_valid --- HW4_Voskoboinikov/ultimate_protein_tools.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index a079d16..7e67557 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -182,13 +182,24 @@ def invert_dct(dct: dict) -> dict: Return: - dict: inverted dict """ + inv_dct = {} for k, v in dct.items(): inv_dct[v] = inv_dct.get(v, []) + [k] # get value from dict (return []) and append key return inv_dct -def is_protein_valid(seq): +def is_protein_valid(seq: str) -> bool: + """ + Checks if protein is valid + + Argument: + - seq (str): seq to be checked + + Return: + - bool, the result of the check + """ + if set(seq).issubset(RNA_AA_TABLE): return True return False From dbdef00136f28bb6d9eca8436d40e535818c820e Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 03:37:42 +0300 Subject: [PATCH 30/53] Add docstrings for func find_sites --- HW4_Voskoboinikov/ultimate_protein_tools.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 7e67557..bf746ad 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -182,7 +182,7 @@ def invert_dct(dct: dict) -> dict: Return: - dict: inverted dict """ - + inv_dct = {} for k, v in dct.items(): inv_dct[v] = inv_dct.get(v, []) + [k] # get value from dict (return []) and append key @@ -205,7 +205,22 @@ def is_protein_valid(seq: str) -> bool: return False -def find_sites(seq, *sites, is_one_based = False, **kwargs): +def find_sites(seq: str, + *sites: str, + is_one_based: bool = False, + **kwargs) -> dict: + """ + Finds indexes of given sites + + Argument: + - seq (str): seq to be checked + - *args (str): sites to be found + - is_one_based (bool): whether result should be 0- (False) or 1-indexed (True). Default False + + Return: + - dict: dictionary of sites as keys and lists of indexes for the site where it's been found + """ + window_sizes = invert_dct(get_sites_lengths(sites)) # get lengths of all sites and stick them together to avoid passing through seq multiple times if possible found_sites = {} for window_size in window_sizes: # perform iteration for all given lengths of sites From 5c8cd44fc55817d0ba9fc1a9ef0cf5625d133ef0 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 10:20:41 +0300 Subject: [PATCH 31/53] Add docstring to get_protein_rnas func and make code more polite --- HW4_Voskoboinikov/ultimate_protein_tools.py | 26 +++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index bf746ad..3c0a76a 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -224,7 +224,7 @@ def find_sites(seq: str, window_sizes = invert_dct(get_sites_lengths(sites)) # get lengths of all sites and stick them together to avoid passing through seq multiple times if possible found_sites = {} for window_size in window_sizes: # perform iteration for all given lengths of sites - for i in range(len(seq) - window_size + 1): # iterate through seq with step one and consider window of site length each iteration + for i in range(len(seq) - window_size + 1): # iterate through seq with step one and consider window of site length each step scatter = seq[i:i + window_size] # get fragment of sequence with length of window i.e. scatter for site in window_sizes[window_size]: if scatter == site: # check if scatter is site @@ -235,8 +235,20 @@ def find_sites(seq: str, return found_sites -def get_protein_rnas(seq, i_absolutely_fucking_know_what_im_doing = False): - if i_absolutely_fucking_know_what_im_doing: +def get_protein_rnas(seq: str, + check_if_user_conscious: bool = False) -> list: + """ + Returns list of all possible RNA's from which can serve as matrix for protein synthesis. WARNING: can be computationally intence on longer sequences, will NOT start unless check_if_user_conscious is True + + Argument: + - seq (str): seq to be checked + - check_if_user_conscious (bool): checks user's consciousness. Default False + + Return: + - list: list of possible RNA's as str + """ + + if check_if_user_conscious: kmers = [''] # set initial kmers for amino_acid in seq: # iterate AAs current_kmers = [] @@ -248,7 +260,7 @@ def get_protein_rnas(seq, i_absolutely_fucking_know_what_im_doing = False): return kmers - return "You don't fucking know what you're doing!" # politely ask user to reconsider their actions + return "You don't know what you're doing!" # politely ask user to reconsider their actions def get_protein_rnas_number(seq): @@ -258,10 +270,10 @@ def get_protein_rnas_number(seq): return rnas_num -def get_frameshift_proteins(seq, i_absolutely_fucking_know_what_im_doing = False, is_stop_codon_termination_enabled=False): - if i_absolutely_fucking_know_what_im_doing: +def get_frameshift_proteins(seq, check_if_user_conscious = False, is_stop_codon_termination_enabled=False): + if check_if_user_conscious: frameshift_dct = {'frame_0': [seq]} # set current seq as frame_0 (protein from not-shifted frame) - rnas = get_protein_rnas(seq, i_absolutely_fucking_know_what_im_doing = i_absolutely_fucking_know_what_im_doing) + rnas = get_protein_rnas(seq, check_if_user_conscious = check_if_user_conscious) for frame_number in [1, 2]: frames_list = [] for rna in rnas: From 9a4e4bedabf78ff29e93f150865c25cc3127cc56 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 10:23:49 +0300 Subject: [PATCH 32/53] Add docstring to get_protein_rnas_number func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 3c0a76a..91cd407 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -263,7 +263,17 @@ def get_protein_rnas(seq: str, return "You don't know what you're doing!" # politely ask user to reconsider their actions -def get_protein_rnas_number(seq): +def get_protein_rnas_number(seq: int) -> int: + """ + Get number of all possible RNA's for a given protein + + Argument: + - seq (str): seq to be checked + + Return: + - int: number of possible RNA's for seq + """ + rnas_num = 1 for amino_acid in seq: rnas_num *= len(RNA_AA_TABLE[amino_acid]) From 43836925b9a6b7de3f2c29457aeafd25fc791bdb Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 10:52:06 +0300 Subject: [PATCH 33/53] Add docstring to get_frameshift_proteins func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 30 +++++++++++++++------ 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 91cd407..fa903fd 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -134,7 +134,7 @@ def read_seq_from_fasta(path_to_seq: str, """ Reads sequences from fasta file and returns dictionary - Argument: + Arguments: - path_to_seq (str): path to file Return: @@ -159,7 +159,7 @@ def get_sites_lengths(sites: list) -> dict: """ Takes sites list and calculates their lengths - Argument: + Arguments: - sites (list): list of sites (str) Return: @@ -176,7 +176,7 @@ def invert_dct(dct: dict) -> dict: """ Inverts a dict - Argument: + Arguments: - dct (dict): dict to be inverted Return: @@ -193,7 +193,7 @@ def is_protein_valid(seq: str) -> bool: """ Checks if protein is valid - Argument: + Arguments: - seq (str): seq to be checked Return: @@ -212,7 +212,7 @@ def find_sites(seq: str, """ Finds indexes of given sites - Argument: + Arguments: - seq (str): seq to be checked - *args (str): sites to be found - is_one_based (bool): whether result should be 0- (False) or 1-indexed (True). Default False @@ -240,7 +240,7 @@ def get_protein_rnas(seq: str, """ Returns list of all possible RNA's from which can serve as matrix for protein synthesis. WARNING: can be computationally intence on longer sequences, will NOT start unless check_if_user_conscious is True - Argument: + Arguments: - seq (str): seq to be checked - check_if_user_conscious (bool): checks user's consciousness. Default False @@ -267,7 +267,7 @@ def get_protein_rnas_number(seq: int) -> int: """ Get number of all possible RNA's for a given protein - Argument: + Arguments: - seq (str): seq to be checked Return: @@ -280,7 +280,21 @@ def get_protein_rnas_number(seq: int) -> int: return rnas_num -def get_frameshift_proteins(seq, check_if_user_conscious = False, is_stop_codon_termination_enabled=False): +def get_frameshift_proteins(seq: int, + check_if_user_conscious: bool = False, + is_stop_codon_termination_enabled: bool = False) -> dict: + """ + Returns list of all possible proteins from all possible frames in peptide. WARNING: can be computationally intence on longer sequences, will NOT start unless check_if_user_conscious is True + + Arguments: + - seq (str): seq to be checked + - check_if_user_conscious (bool): checks user's consciousness. Default False + - is_stop_codon_termination_enabled (bool): terminate translation when reached stop-codon. Default False. + + Return: + - dict: dict of lists of all possible frames proteins {frame_0: ['protein_seqs'], frame_1: ['protein_seqs'], frame_2: ['protein_seqs']} + """ + if check_if_user_conscious: frameshift_dct = {'frame_0': [seq]} # set current seq as frame_0 (protein from not-shifted frame) rnas = get_protein_rnas(seq, check_if_user_conscious = check_if_user_conscious) From c9d484c47ce5c08750243b90f57daa18f4a5a0c3 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 11:40:55 +0300 Subject: [PATCH 34/53] Add parse_input func and update run_ultimate_protein_tools --- HW4_Voskoboinikov/ultimate_protein_tools.py | 51 ++++++++++++++------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index fa903fd..cf0aea9 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -130,7 +130,7 @@ def read_seq_from_fasta(path_to_seq: str, use_full_name: bool = False, - **kwargs) -> dict: + **_) -> dict: """ Reads sequences from fasta file and returns dictionary @@ -185,7 +185,7 @@ def invert_dct(dct: dict) -> dict: inv_dct = {} for k, v in dct.items(): - inv_dct[v] = inv_dct.get(v, []) + [k] # get value from dict (return []) and append key + inv_dct[v] = inv_dct.get(v, []) + [k] # get value from dict (return [] if empty) and append key return inv_dct @@ -208,7 +208,7 @@ def is_protein_valid(seq: str) -> bool: def find_sites(seq: str, *sites: str, is_one_based: bool = False, - **kwargs) -> dict: + **_) -> dict: """ Finds indexes of given sites @@ -282,7 +282,8 @@ def get_protein_rnas_number(seq: int) -> int: def get_frameshift_proteins(seq: int, check_if_user_conscious: bool = False, - is_stop_codon_termination_enabled: bool = False) -> dict: + is_stop_codon_termination_enabled: bool = False, + **_) -> dict: """ Returns list of all possible proteins from all possible frames in peptide. WARNING: can be computationally intence on longer sequences, will NOT start unless check_if_user_conscious is True @@ -494,22 +495,40 @@ def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = } +def parse_input(inp: str, **kwargs) -> dict: + """ + Parses input and returns dict of seqs + + Arguments: + - inp (str): Input path or seq or dict of seqs or list of seqs + - **kwargs: Additional keyword arguments to be passed to input reader (e.g. ) + + Return: + - parsed_dct (dict): dict where keys are number or name of seq and value of seq + """ + parsed_dct = {} + inp_type = type(inp) + if inp_type == list: + for i, seq in enumerate(inp): + parsed_dct |= {i: seq} + elif inp_type == dict: + parsed_dct = inp + elif inp_type == str and '.' in inp: + parsed_dct = input_dct = read_seq_from_fasta(inp, **kwargs) + elif inp_type == str: + parsed_dct = {0: inp} + + return parsed_dct + + + + def run_ultimate_protein_tools(command, + inp, *args, - input_path = None, - input_seq = None, - input_lst = None, - input_dct = None, **kwargs): output_dct = {} - if input_path: - input_dct = read_seq_from_fasta(input_path, **kwargs) - elif input_seq: # TODO possible name parsing - input_dct= {0: input_seq} - elif input_lst: - input_dct = {} - for i, seq in enumerate(input_lst): - input_dct |= {i: seq} + input_dct = parse_input(inp) for name in input_dct: if is_protein_valid(input_dct[name]): output_dct[name] = COMMAND_DCT[command](input_dct[name], *args, **kwargs) From 96bccd75fe61a7583f58acb803897a437c2d93df Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 12:04:28 +0300 Subject: [PATCH 35/53] Update RNA_AA_TABLE and RNA_CODON_TABLE consts with lowercase --- HW4_Voskoboinikov/ultimate_protein_tools.py | 213 ++++++++++++++++---- 1 file changed, 172 insertions(+), 41 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index cf0aea9..390ed42 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -82,49 +82,180 @@ } -# TODO check if possible to rempve kwargs RNA_AA_TABLE = { -'F': ['UUU', 'UUC'], - 'L': ['UUA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG'], - 'S': ['UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC'], - 'Y': ['UAU', 'UAC'], - '*': ['UAA', 'UAG', 'UGA'], - 'C': ['UGU', 'UGC'], - 'W': ['UGG'], - 'P': ['CCU', 'CCC', 'CCA', 'CCG'], - 'H': ['CAU', 'CAC'], - 'Q': ['CAA', 'CAG'], - 'R': ['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], - 'I': ['AUU', 'AUC', 'AUA'], - 'M': ['AUG'], - 'T': ['ACU', 'ACC', 'ACA', 'ACG'], - 'N': ['AAU', 'AAC'], - 'K': ['AAA', 'AAG'], - 'V': ['GUU', 'GUC', 'GUA', 'GUG'], - 'A': ['GCU', 'GCC', 'GCA', 'GCG'], - 'D': ['GAU', 'GAC'], - 'E': ['GAA', 'GAG'], - 'G': ['GGU', 'GGC', 'GGA', 'GGG'], -} + 'F': ['UUU', 'UUC'], + 'L': ['UUA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG'], + 'S': ['UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC'], + 'Y': ['UAU', 'UAC'], + '*': ['uaa', 'uag', 'uga'], + 'C': ['UGU', 'UGC'], + 'W': ['UGG'], + 'P': ['CCU', 'CCC', 'CCA', 'CCG'], + 'H': ['CAU', 'CAC'], + 'Q': ['CAA', 'CAG'], + 'R': ['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], + 'I': ['AUU', 'AUC', 'AUA'], + 'M': ['AUG'], + 'T': ['ACU', 'ACC', 'ACA', 'ACG'], + 'N': ['AAU', 'AAC'], + 'K': ['AAA', 'AAG'], + 'V': ['GUU', 'GUC', 'GUA', 'GUG'], + 'A': ['GCU', 'GCC', 'GCA', 'GCG'], + 'D': ['GAU', 'GAC'], + 'E': ['GAA', 'GAG'], + 'G': ['GGU', 'GGC', 'GGA', 'GGG'], + 'f': ['uuu', 'uuc'], + 'l': ['uua', 'uug', 'cuu', 'cuc', 'cua', 'cug'], + 's': ['ucu', 'ucc', 'uca', 'ucg', 'agu', 'agc'], + 'y': ['uau', 'uac'], + 'c': ['ugu', 'ugc'], + 'w': ['ugg'], + 'p': ['ccu', 'ccc', 'cca', 'ccg'], + 'h': ['cau', 'cac'], + 'q': ['caa', 'cag'], + 'r': ['cgu', 'cgc', 'cga', 'cgg', 'aga', 'agg'], + 'i': ['auu', 'auc', 'aua'], + 'm': ['aug'], + 't': ['acu', 'acc', 'aca', 'acg'], + 'n': ['aau', 'aac'], + 'k': ['aaa', 'aag'], + 'v': ['guu', 'guc', 'gua', 'gug'], + 'a': ['gcu', 'gcc', 'gca', 'gcg'], + 'd': ['gau', 'gac'], + 'e': ['gaa', 'gag'], + 'g': ['ggu', 'ggc', 'gga', 'ggg'] + } RNA_CODON_TABLE = { - 'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L', - 'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S', - 'UAU': 'Y', 'UAC': 'Y', 'UAA': '*', 'UAG': '*', - 'UGU': 'C', 'UGC': 'C', 'UGA': '*', 'UGG': 'W', - 'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L', - 'CCU': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', - 'CAU': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', - 'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', - 'AUU': 'I', 'AUC': 'I', 'AUA': 'I', 'AUG': 'M', - 'ACU': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', - 'AAU': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', - 'AGU': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', - 'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V', - 'GCU': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', - 'GAU': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', - 'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', + 'UUU': 'F', + 'UUC': 'F', + 'UUA': 'L', + 'UUG': 'L', + 'UCU': 'S', + 'UCC': 'S', + 'UCA': 'S', + 'UCG': 'S', + 'UAU': 'Y', + 'UAC': 'Y', + 'UAA': '*', + 'UAG': '*', + 'UGU': 'C', + 'UGC': 'C', + 'UGA': '*', + 'UGG': 'W', + 'CUU': 'L', + 'CUC': 'L', + 'CUA': 'L', + 'CUG': 'L', + 'CCU': 'P', + 'CCC': 'P', + 'CCA': 'P', + 'CCG': 'P', + 'CAU': 'H', + 'CAC': 'H', + 'CAA': 'Q', + 'CAG': 'Q', + 'CGU': 'R', + 'CGC': 'R', + 'CGA': 'R', + 'CGG': 'R', + 'AUU': 'I', + 'AUC': 'I', + 'AUA': 'I', + 'AUG': 'M', + 'ACU': 'T', + 'ACC': 'T', + 'ACA': 'T', + 'ACG': 'T', + 'AAU': 'N', + 'AAC': 'N', + 'AAA': 'K', + 'AAG': 'K', + 'AGU': 'S', + 'AGC': 'S', + 'AGA': 'R', + 'AGG': 'R', + 'GUU': 'V', + 'GUC': 'V', + 'GUA': 'V', + 'GUG': 'V', + 'GCU': 'A', + 'GCC': 'A', + 'GCA': 'A', + 'GCG': 'A', + 'GAU': 'D', + 'GAC': 'D', + 'GAA': 'E', + 'GAG': 'E', + 'GGU': 'G', + 'GGC': 'G', + 'GGA': 'G', + 'GGG': 'G', + 'uuu': 'F', + 'uuc': 'F', + 'uua': 'L', + 'uug': 'L', + 'ucu': 'S', + 'ucc': 'S', + 'uca': 'S', + 'ucg': 'S', + 'uau': 'Y', + 'uac': 'Y', + 'uaa': '*', + 'uag': '*', + 'ugu': 'C', + 'ugc': 'C', + 'uga': '*', + 'ugg': 'W', + 'cuu': 'L', + 'cuc': 'L', + 'cua': 'L', + 'cug': 'L', + 'ccu': 'P', + 'ccc': 'P', + 'cca': 'P', + 'ccg': 'P', + 'cau': 'H', + 'cac': 'H', + 'caa': 'Q', + 'cag': 'Q', + 'cgu': 'R', + 'cgc': 'R', + 'cga': 'R', + 'cgg': 'R', + 'auu': 'I', + 'auc': 'I', + 'aua': 'I', + 'aug': 'M', + 'acu': 'T', + 'acc': 'T', + 'aca': 'T', + 'acg': 'T', + 'aau': 'N', + 'aac': 'N', + 'aaa': 'K', + 'aag': 'K', + 'agu': 'S', + 'agc': 'S', + 'aga': 'R', + 'agg': 'R', + 'guu': 'V', + 'guc': 'V', + 'gua': 'V', + 'gug': 'V', + 'gcu': 'A', + 'gcc': 'A', + 'gca': 'A', + 'gcg': 'A', + 'gau': 'D', + 'gac': 'D', + 'gaa': 'E', + 'gag': 'E', + 'ggu': 'G', + 'ggc': 'G', + 'gga': 'G', + 'ggg': 'G' } @@ -507,13 +638,13 @@ def parse_input(inp: str, **kwargs) -> dict: - parsed_dct (dict): dict where keys are number or name of seq and value of seq """ parsed_dct = {} - inp_type = type(inp) + inp_type = type(inp) # get input type if inp_type == list: for i, seq in enumerate(inp): parsed_dct |= {i: seq} elif inp_type == dict: parsed_dct = inp - elif inp_type == str and '.' in inp: + elif inp_type == str and '.' in inp: # check whether input has file extension symbols parsed_dct = input_dct = read_seq_from_fasta(inp, **kwargs) elif inp_type == str: parsed_dct = {0: inp} From 05362377c0bd335711b629fa7eefdef1d8f848a4 Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sun, 1 Oct 2023 12:11:58 +0300 Subject: [PATCH 36/53] Update docstrings and comments --- HW4_Voskoboinikov/ultimate_protein_tools.py | 176 ++++++++++---------- 1 file changed, 89 insertions(+), 87 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index cf0aea9..20bcea1 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -14,17 +14,15 @@ 'P': 'Proline', 'p': 'proline', 'Q': 'Glutamine', 'q': 'glutamine', 'R': 'Arginine', 'r': 'arginine', - 'S': 'Serine', 's': 'serine', + 'S': 'Serine', 's': 'serine', 'T': 'Threonine', 't': 'threonine', 'V': 'Valine', 'v': 'valine', 'W': 'Tryptophan', 'w': 'tryptophan', 'Y': 'Tyrosine', 'y': 'tyrosine' - } - +} H2O_WEIGHT: float = 18.01468 - AA_MASS_DICT: dict[str, float] = { 'G': 75.0659, 'g': 75.0659, 'L': 131.17262, 'l': 131.17262, @@ -46,8 +44,7 @@ 'I': 131.17262, 'i': 131.17262, 'M': 149.21094, 'm': 149.21094, 'T': 119.11826, 't': 119.11826, - } - +} ATOMIC_MASS: dict[str, float] = { 'C': 12.011, @@ -55,8 +52,7 @@ 'O': 15.999, 'N': 14.0067, 'S': 32.065 - } - +} AA_NAME_DICT: dict[str, str] = { 'G': 'Gly', 'g': 'Gly', @@ -79,35 +75,33 @@ 'I': 'Ile', 'i': 'Ile', 'M': 'Met', 'm': 'Met', 'T': 'Thr', 't': 'Thr' - } - +} -# TODO check if possible to rempve kwargs +# TODO check if possible to remove kwargs RNA_AA_TABLE = { -'F': ['UUU', 'UUC'], - 'L': ['UUA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG'], - 'S': ['UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC'], - 'Y': ['UAU', 'UAC'], - '*': ['UAA', 'UAG', 'UGA'], - 'C': ['UGU', 'UGC'], - 'W': ['UGG'], - 'P': ['CCU', 'CCC', 'CCA', 'CCG'], - 'H': ['CAU', 'CAC'], - 'Q': ['CAA', 'CAG'], - 'R': ['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], - 'I': ['AUU', 'AUC', 'AUA'], - 'M': ['AUG'], - 'T': ['ACU', 'ACC', 'ACA', 'ACG'], - 'N': ['AAU', 'AAC'], - 'K': ['AAA', 'AAG'], - 'V': ['GUU', 'GUC', 'GUA', 'GUG'], - 'A': ['GCU', 'GCC', 'GCA', 'GCG'], - 'D': ['GAU', 'GAC'], - 'E': ['GAA', 'GAG'], - 'G': ['GGU', 'GGC', 'GGA', 'GGG'], + 'F': ['UUU', 'UUC'], + 'L': ['UUA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG'], + 'S': ['UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC'], + 'Y': ['UAU', 'UAC'], + '*': ['UAA', 'UAG', 'UGA'], + 'C': ['UGU', 'UGC'], + 'W': ['UGG'], + 'P': ['CCU', 'CCC', 'CCA', 'CCG'], + 'H': ['CAU', 'CAC'], + 'Q': ['CAA', 'CAG'], + 'R': ['CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], + 'I': ['AUU', 'AUC', 'AUA'], + 'M': ['AUG'], + 'T': ['ACU', 'ACC', 'ACA', 'ACG'], + 'N': ['AAU', 'AAC'], + 'K': ['AAA', 'AAG'], + 'V': ['GUU', 'GUC', 'GUA', 'GUG'], + 'A': ['GCU', 'GCC', 'GCA', 'GCG'], + 'D': ['GAU', 'GAC'], + 'E': ['GAA', 'GAG'], + 'G': ['GGU', 'GGC', 'GGA', 'GGG'], } - RNA_CODON_TABLE = { 'UUU': 'F', 'UUC': 'F', 'UUA': 'L', 'UUG': 'L', 'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S', @@ -128,11 +122,11 @@ } -def read_seq_from_fasta(path_to_seq: str, - use_full_name: bool = False, +def read_seq_from_fasta(path_to_seq: str, + use_full_name: bool = False, **_) -> dict: """ - Reads sequences from fasta file and returns dictionary + Reads sequences from fasta file and returns dictionary. Arguments: - path_to_seq (str): path to file @@ -145,19 +139,19 @@ def read_seq_from_fasta(path_to_seq: str, out_dct = {} for line in f: line = line.strip() - if line.startswith('>'): # check for first line in seq - if use_full_name: # check if user set full name in fasta - name = line[1:] # take whole fasta properties (e.g. if names not unique) + if line.startswith('>'): # check for first line in seq + if use_full_name: # check if user set full name in fasta + name = line[1:] # take whole fasta properties (e.g. if names not unique) else: name = line[1:].split()[0] else: - out_dct[name] = out_dct.get(name, '') + line # get value from dict (return '' if empty) and append str + out_dct[name] = out_dct.get(name, '') + line # get value from dict (return '' if empty) and append str return out_dct def get_sites_lengths(sites: list) -> dict: """ - Takes sites list and calculates their lengths + Takes sites list and calculates their lengths. Arguments: - sites (list): list of sites (str) @@ -174,7 +168,7 @@ def get_sites_lengths(sites: list) -> dict: def invert_dct(dct: dict) -> dict: """ - Inverts a dict + Inverts a dict. Arguments: - dct (dict): dict to be inverted @@ -185,13 +179,13 @@ def invert_dct(dct: dict) -> dict: inv_dct = {} for k, v in dct.items(): - inv_dct[v] = inv_dct.get(v, []) + [k] # get value from dict (return [] if empty) and append key + inv_dct[v] = inv_dct.get(v, []) + [k] # get value from dict (return [] if empty) and append key return inv_dct def is_protein_valid(seq: str) -> bool: """ - Checks if protein is valid + Checks if protein is valid. Arguments: - seq (str): seq to be checked @@ -205,12 +199,12 @@ def is_protein_valid(seq: str) -> bool: return False -def find_sites(seq: str, - *sites: str, - is_one_based: bool = False, +def find_sites(seq: str, + *sites: str, + is_one_based: bool = False, **_) -> dict: """ - Finds indexes of given sites + Finds indexes of given sites. Arguments: - seq (str): seq to be checked @@ -221,24 +215,31 @@ def find_sites(seq: str, - dict: dictionary of sites as keys and lists of indexes for the site where it's been found """ - window_sizes = invert_dct(get_sites_lengths(sites)) # get lengths of all sites and stick them together to avoid passing through seq multiple times if possible - found_sites = {} - for window_size in window_sizes: # perform iteration for all given lengths of sites - for i in range(len(seq) - window_size + 1): # iterate through seq with step one and consider window of site length each step - scatter = seq[i:i + window_size] # get fragment of sequence with length of window i.e. scatter + window_sizes = invert_dct(get_sites_lengths( + sites)) # get lengths of all sites and stick them together to avoid passing through seq multiple times if + # possible + found_sites = {} + for window_size in window_sizes: # perform iteration for all given lengths of sites + for i in range( + len(seq) - window_size + 1): # iterate through seq with step one and consider window + # of site length each step + scatter = seq[i:i + window_size] # get fragment of sequence with length of window i.e. scatter for site in window_sizes[window_size]: - if scatter == site: # check if scatter is site + if scatter == site: # check if scatter is site found_sites[site] = ( - found_sites.get(site, []) # get - + [i + is_one_based] - ) # append index to list in dict + found_sites.get(site, []) # get + + [i + is_one_based] + ) # append index to list in dict return found_sites def get_protein_rnas(seq: str, check_if_user_conscious: bool = False) -> list: """ - Returns list of all possible RNA's from which can serve as matrix for protein synthesis. WARNING: can be computationally intence on longer sequences, will NOT start unless check_if_user_conscious is True + Returns list of all possible RNA's from which can serve as matrix for protein synthesis. + + WARNING: can be computationally intensive on longer sequences, + will NOT start unless check_if_user_conscious is True! Arguments: - seq (str): seq to be checked @@ -249,23 +250,23 @@ def get_protein_rnas(seq: str, """ if check_if_user_conscious: - kmers = [''] # set initial kmers - for amino_acid in seq: # iterate AAs + kmers = [''] # set initial kmers + for amino_acid in seq: # iterate AAs current_kmers = [] - codons = RNA_AA_TABLE[amino_acid] # get list of codons for AA + codons = RNA_AA_TABLE[amino_acid] # get list of codons for AA for codon in codons: for kmer in kmers: - current_kmers.append(kmer + codon) # append every codon to existing kmers - kmers = current_kmers # re-write k-mers for next iteration + current_kmers.append(kmer + codon) # append every codon to existing kmers + kmers = current_kmers # re-write k-mers for next iteration return kmers - return "You don't know what you're doing!" # politely ask user to reconsider their actions + return "You don't know what you're doing!" # politely ask user to reconsider their actions def get_protein_rnas_number(seq: int) -> int: """ - Get number of all possible RNA's for a given protein + Get number of all possible RNA's for a given protein. Arguments: - seq (str): seq to be checked @@ -280,12 +281,15 @@ def get_protein_rnas_number(seq: int) -> int: return rnas_num -def get_frameshift_proteins(seq: int, - check_if_user_conscious: bool = False, +def get_frameshift_proteins(seq: int, + check_if_user_conscious: bool = False, is_stop_codon_termination_enabled: bool = False, **_) -> dict: """ - Returns list of all possible proteins from all possible frames in peptide. WARNING: can be computationally intence on longer sequences, will NOT start unless check_if_user_conscious is True + Returns list of all possible proteins from all possible frames in peptide. + + WARNING: can be computationally intensive on longer sequences, + will NOT start unless check_if_user_conscious is True! Arguments: - seq (str): seq to be checked @@ -293,25 +297,27 @@ def get_frameshift_proteins(seq: int, - is_stop_codon_termination_enabled (bool): terminate translation when reached stop-codon. Default False. Return: - - dict: dict of lists of all possible frames proteins {frame_0: ['protein_seqs'], frame_1: ['protein_seqs'], frame_2: ['protein_seqs']} + - dict: dict of lists of all possible frames proteins: + {frame_0: ['protein_seqs'], frame_1: ['protein_seqs'], frame_2: ['protein_seqs']} """ if check_if_user_conscious: - frameshift_dct = {'frame_0': [seq]} # set current seq as frame_0 (protein from not-shifted frame) - rnas = get_protein_rnas(seq, check_if_user_conscious = check_if_user_conscious) + frameshift_dct = {'frame_0': [seq]} # set current seq as frame_0 (protein from not-shifted frame) + rnas = get_protein_rnas(seq, check_if_user_conscious=check_if_user_conscious) for frame_number in [1, 2]: frames_list = [] for rna in rnas: frame = '' - for i in range(frame_number, len(rna) - (frame_number + 1), 3): # set frame-dependent range to iterate - frame_codon = rna[i:i+3] # extract codon + for i in range(frame_number, len(rna) - (frame_number + 1), 3): # set frame-dependent range to iterate + frame_codon = rna[i:i + 3] # extract codon frame += RNA_CODON_TABLE[frame_codon] - if is_stop_codon_termination_enabled and RNA_CODON_TABLE[frame_codon] == '*': # stop writing if meet stop-codon + if is_stop_codon_termination_enabled and RNA_CODON_TABLE[ + frame_codon] == '*': # stop writing if meet stop-codon break - frames_list.append(frame) # append frame to frames list - frameshift_dct[f'frame_{frame_number}'] = list(set(frames_list)) # clean duplicates and write to dict + frames_list.append(frame) # append frame to frames list + frameshift_dct[f'frame_{frame_number}'] = list(set(frames_list)) # clean duplicates and write to dict return frameshift_dct - return "You don't fucking know what you're doing!" # politely ask user to reconsider their actions + return "You don't fucking know what you're doing!" # politely ask user to reconsider their actions def length_of_protein(seq: str) -> int: @@ -382,7 +388,6 @@ def get_fracture_of_aa(seq: str, *, show_as_percentage: bool = False, aminoacids def calculate_protein_mass(sequence: str, aa_atomic_mass: dict[str, float] = None) -> float: """ - Calculates the molecular mass of a protein based on its amino acid sequence and a dictionary of amino acid masses. Arguments / Args: @@ -409,7 +414,6 @@ def calculate_protein_mass(sequence: str, aa_atomic_mass: dict[str, float] = Non def get_atomic_mass(chem: str, atomic_mass: dict[str, float] = None) -> float: """ - Calculates the molecular mass of a biological molecule, primarily an amino acid, based on a simple chemical formula. Arguments / Args: @@ -446,7 +450,6 @@ def get_atomic_mass(chem: str, atomic_mass: dict[str, float] = None) -> float: def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = '', use_default_register: bool = True) -> str: """ - Converts a sequence of one-letter amino acid codes to three-letter designations. Arguments / Args: @@ -461,7 +464,7 @@ def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = Return: - str: A string of three-letter amino acid designations separated by the specified delimiter. """ - + new_name = '' if name_dict is None: name_dict = AA_NAME_DICT @@ -488,16 +491,16 @@ def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = COMMAND_DCT = { 'find_sites': find_sites, - 'get_protein_rnas' : get_protein_rnas, + 'get_protein_rnas': get_protein_rnas, 'get_protein_rnas_number': get_protein_rnas_number, 'get_frameshift_proteins': get_frameshift_proteins, 'is_protein_valid': is_protein_valid, - } +} def parse_input(inp: str, **kwargs) -> dict: """ - Parses input and returns dict of seqs + Parses input and returns dict of seqs. Arguments: - inp (str): Input path or seq or dict of seqs or list of seqs @@ -506,6 +509,7 @@ def parse_input(inp: str, **kwargs) -> dict: Return: - parsed_dct (dict): dict where keys are number or name of seq and value of seq """ + parsed_dct = {} inp_type = type(inp) if inp_type == list: @@ -517,10 +521,8 @@ def parse_input(inp: str, **kwargs) -> dict: parsed_dct = input_dct = read_seq_from_fasta(inp, **kwargs) elif inp_type == str: parsed_dct = {0: inp} - - return parsed_dct - + return parsed_dct def run_ultimate_protein_tools(command, From af4d81908161ffb36d2b0a2a07018c3cee9b2888 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 12:21:12 +0300 Subject: [PATCH 37/53] Add check_all_upper finc and update get_frameshift_proteins --- HW4_Voskoboinikov/ultimate_protein_tools.py | 22 ++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 390ed42..ead92c2 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -402,7 +402,7 @@ def get_protein_rnas_number(seq: int) -> int: - seq (str): seq to be checked Return: - - int: number of possible RNA's for seq + - rnas_num (int): number of possible RNA's for seq """ rnas_num = 1 @@ -411,6 +411,24 @@ def get_protein_rnas_number(seq: int) -> int: return rnas_num +def check_all_upper(codon: str) -> bool: + """ + Checks whether all letters in colon are upper + + Arguments: + - codon (str): codon to be checked + + Return: + - check_upper (bool): if all letters are uppercase + """ + + check_upper = True + for letter in (set(codon)): + letter.isupper() and check_upper + return check_upper + + + def get_frameshift_proteins(seq: int, check_if_user_conscious: bool = False, is_stop_codon_termination_enabled: bool = False, @@ -436,6 +454,8 @@ def get_frameshift_proteins(seq: int, frame = '' for i in range(frame_number, len(rna) - (frame_number + 1), 3): # set frame-dependent range to iterate frame_codon = rna[i:i+3] # extract codon + if not check_all_upper(frame_codon): # check if all letters in codon uppercase + frame_codon = frame_codon.tolower() # if not change all to lowercase frame += RNA_CODON_TABLE[frame_codon] if is_stop_codon_termination_enabled and RNA_CODON_TABLE[frame_codon] == '*': # stop writing if meet stop-codon break From f03b11153b6812fd8d9fef67c1cf7c048034b77e Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sun, 1 Oct 2023 12:35:42 +0300 Subject: [PATCH 38/53] Change name of function length_of_protein to get_length_of_protein --- HW4_Voskoboinikov/ultimate_protein_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 20bcea1..fcd6fb0 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -320,7 +320,7 @@ def get_frameshift_proteins(seq: int, return "You don't fucking know what you're doing!" # politely ask user to reconsider their actions -def length_of_protein(seq: str) -> int: +def get_length_of_protein(seq: str) -> int: """ Calculates the length of a protein. @@ -380,7 +380,7 @@ def get_fracture_of_aa(seq: str, *, show_as_percentage: bool = False, aminoacids round_var = 4 aa_dict_count = count_aa(seq, aminoacids=aminoacids) aa_dict_percent = {} - len_of_protein = length_of_protein(seq) + len_of_protein = get_length_of_protein(seq) for aa, count in aa_dict_count.items(): aa_dict_percent[aa] = round(count / len_of_protein * mult, round_var) return aa_dict_percent From 0f0cbcc760ab4c434a0ac9a59541f67a2d4cc808 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 12:39:31 +0300 Subject: [PATCH 39/53] Fix lowercase recognition --- HW4_Voskoboinikov/ultimate_protein_tools.py | 131 ++++++++++---------- 1 file changed, 66 insertions(+), 65 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index ead92c2..77dc636 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -192,71 +192,71 @@ 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', - 'uuu': 'F', - 'uuc': 'F', - 'uua': 'L', - 'uug': 'L', - 'ucu': 'S', - 'ucc': 'S', - 'uca': 'S', - 'ucg': 'S', - 'uau': 'Y', - 'uac': 'Y', + 'uuu': 'f', + 'uuc': 'f', + 'uua': 'l', + 'uug': 'l', + 'ucu': 's', + 'ucc': 's', + 'uca': 's', + 'ucg': 's', + 'uau': 'y', + 'uac': 'y', 'uaa': '*', 'uag': '*', - 'ugu': 'C', - 'ugc': 'C', + 'ugu': 'c', + 'ugc': 'c', 'uga': '*', - 'ugg': 'W', - 'cuu': 'L', - 'cuc': 'L', - 'cua': 'L', - 'cug': 'L', - 'ccu': 'P', - 'ccc': 'P', - 'cca': 'P', - 'ccg': 'P', - 'cau': 'H', - 'cac': 'H', - 'caa': 'Q', - 'cag': 'Q', - 'cgu': 'R', - 'cgc': 'R', - 'cga': 'R', - 'cgg': 'R', - 'auu': 'I', - 'auc': 'I', - 'aua': 'I', - 'aug': 'M', - 'acu': 'T', - 'acc': 'T', - 'aca': 'T', - 'acg': 'T', - 'aau': 'N', - 'aac': 'N', - 'aaa': 'K', - 'aag': 'K', - 'agu': 'S', - 'agc': 'S', - 'aga': 'R', - 'agg': 'R', - 'guu': 'V', - 'guc': 'V', - 'gua': 'V', - 'gug': 'V', - 'gcu': 'A', - 'gcc': 'A', - 'gca': 'A', - 'gcg': 'A', - 'gau': 'D', - 'gac': 'D', - 'gaa': 'E', - 'gag': 'E', - 'ggu': 'G', - 'ggc': 'G', - 'gga': 'G', - 'ggg': 'G' -} + 'ugg': 'w', + 'cuu': 'l', + 'cuc': 'l', + 'cua': 'l', + 'cug': 'l', + 'ccu': 'p', + 'ccc': 'p', + 'cca': 'p', + 'ccg': 'p', + 'cau': 'h', + 'cac': 'h', + 'caa': 'q', + 'cag': 'q', + 'cgu': 'r', + 'cgc': 'r', + 'cga': 'r', + 'cgg': 'r', + 'auu': 'i', + 'auc': 'i', + 'aua': 'i', + 'aug': 'm', + 'acu': 't', + 'acc': 't', + 'aca': 't', + 'acg': 't', + 'aau': 'n', + 'aac': 'n', + 'aaa': 'k', + 'aag': 'k', + 'agu': 's', + 'agc': 's', + 'aga': 'r', + 'agg': 'r', + 'guu': 'v', + 'guc': 'v', + 'gua': 'v', + 'gug': 'v', + 'gcu': 'a', + 'gcc': 'a', + 'gca': 'a', + 'gcg': 'a', + 'gau': 'd', + 'gac': 'd', + 'gaa': 'e', + 'gag': 'e', + 'ggu': 'g', + 'ggc': 'g', + 'gga': 'g', + 'ggg': 'g' + } def read_seq_from_fasta(path_to_seq: str, @@ -367,7 +367,8 @@ def find_sites(seq: str, def get_protein_rnas(seq: str, - check_if_user_conscious: bool = False) -> list: + check_if_user_conscious: bool = False, + **_) -> list: """ Returns list of all possible RNA's from which can serve as matrix for protein synthesis. WARNING: can be computationally intence on longer sequences, will NOT start unless check_if_user_conscious is True @@ -424,7 +425,7 @@ def check_all_upper(codon: str) -> bool: check_upper = True for letter in (set(codon)): - letter.isupper() and check_upper + check_upper = letter.isupper() and check_upper return check_upper @@ -455,7 +456,7 @@ def get_frameshift_proteins(seq: int, for i in range(frame_number, len(rna) - (frame_number + 1), 3): # set frame-dependent range to iterate frame_codon = rna[i:i+3] # extract codon if not check_all_upper(frame_codon): # check if all letters in codon uppercase - frame_codon = frame_codon.tolower() # if not change all to lowercase + frame_codon = frame_codon.lower() # if not change all to lowercase frame += RNA_CODON_TABLE[frame_codon] if is_stop_codon_termination_enabled and RNA_CODON_TABLE[frame_codon] == '*': # stop writing if meet stop-codon break From f373a164c84cb0fd66248fddb0dcf924f6378eb7 Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sun, 1 Oct 2023 12:39:33 +0300 Subject: [PATCH 40/53] Update the docstring of function get_fracture_of_aa --- HW4_Voskoboinikov/ultimate_protein_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index fcd6fb0..6a3d545 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -361,7 +361,7 @@ def count_aa(seq: str, *, aminoacids: str = None) -> dict: def get_fracture_of_aa(seq: str, *, show_as_percentage: bool = False, aminoacids: str = None) -> dict: """ - Returns the fracture or percentage of amino acids in a protein sequence. + Calculates the fracture or percentage of amino acids in a protein sequence. Arguments: - seq (str): sequence in which you need to calculate the fracture of amino acids From 274a69517a0eda763fe8000451cc563b8f6c83af Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sun, 1 Oct 2023 12:42:23 +0300 Subject: [PATCH 41/53] Update docstrings of functions calculate_protein_mass, get_atomic_mass, convert_aa_name Remove '/ Args' in all 3 functions --- HW4_Voskoboinikov/ultimate_protein_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 6a3d545..f72253b 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -390,7 +390,7 @@ def calculate_protein_mass(sequence: str, aa_atomic_mass: dict[str, float] = Non """ Calculates the molecular mass of a protein based on its amino acid sequence and a dictionary of amino acid masses. - Arguments / Args: + Arguments: - sequence(str or list): A string or list of characters representing the amino acid sequence. - aa_atomic_mass(dict): A dictionary linking amino acids to their masses in atomic mass units. @@ -416,7 +416,7 @@ def get_atomic_mass(chem: str, atomic_mass: dict[str, float] = None) -> float: """ Calculates the molecular mass of a biological molecule, primarily an amino acid, based on a simple chemical formula. - Arguments / Args: + Arguments: - chem (str): String representing a simple chemical formula, e.g. C2H5OH - atomic_mass (dict[str, float], optional): A dictionary linking the chemical elements Carbon, Hydrogen, Oxygen, Nitrogen, and Sulfur with their masses in atomic mass units. @@ -452,7 +452,7 @@ def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = """ Converts a sequence of one-letter amino acid codes to three-letter designations. - Arguments / Args: + Arguments: - sequence (str): String with one-letter amino acid codes. - name_dict (dict[str, str], optional): A dictionary linking one-letter codes to three-letter designations. If not provided, the standard AA_NAME_DICT dictionary is used. From fbad6416abca3f959beea4b41f4c79f7bb3e5b14 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 13:17:38 +0300 Subject: [PATCH 42/53] Update command dct and fix *args in count_aa and get_fracture_of_aa --- HW4_Voskoboinikov/ultimate_protein_tools.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 77dc636..d2c4f65 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -466,7 +466,7 @@ def get_frameshift_proteins(seq: int, return "You don't fucking know what you're doing!" # politely ask user to reconsider their actions -def length_of_protein(seq: str) -> int: +def length_of_protein(seq: str, **_) -> int: """ Calculates the length of a protein. @@ -480,7 +480,7 @@ def length_of_protein(seq: str) -> int: return len(seq) -def count_aa(seq: str, *, aminoacids: str = None) -> dict: +def count_aa(seq: str, aminoacids: str = None, **_) -> dict: """ Counts the number of given or all amino acids in a protein sequence. @@ -505,7 +505,7 @@ def count_aa(seq: str, *, aminoacids: str = None) -> dict: return aa_dict_count -def get_fracture_of_aa(seq: str, *, show_as_percentage: bool = False, aminoacids: str = None) -> dict: +def get_fracture_of_aa(seq: str, show_as_percentage: bool = False, aminoacids: str = None, **_) -> dict: """ Returns the fracture or percentage of amino acids in a protein sequence. @@ -637,13 +637,16 @@ def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = raise ValueError(f'Unknown amino acid: {aa}') return new_name - -COMMAND_DCT = { +# defined later to let all funcs be initialized before passed here +command_dct = { 'find_sites': find_sites, 'get_protein_rnas' : get_protein_rnas, 'get_protein_rnas_number': get_protein_rnas_number, 'get_frameshift_proteins': get_frameshift_proteins, 'is_protein_valid': is_protein_valid, + 'length_of_protein': length_of_protein, + 'count_aa': count_aa, + 'get_fracture_of_aa': get_fracture_of_aa, } @@ -683,7 +686,7 @@ def run_ultimate_protein_tools(command, input_dct = parse_input(inp) for name in input_dct: if is_protein_valid(input_dct[name]): - output_dct[name] = COMMAND_DCT[command](input_dct[name], *args, **kwargs) + output_dct[name] = command_dct[command](input_dct[name], *args, **kwargs) else: output_dct[name] = is_protein_valid(input_dct[name]) return output_dct From 71090d138060ace9e7538218d825e43f587a57d1 Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sun, 1 Oct 2023 13:23:01 +0300 Subject: [PATCH 43/53] Add 2 spaces in some comments instead of 1 --- HW4_Voskoboinikov/ultimate_protein_tools.py | 23 +++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 3b8ed9d..92f0cef 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -77,7 +77,6 @@ 'T': 'Thr', 't': 'Thr' } - RNA_AA_TABLE = { 'F': ['UUU', 'UUC'], 'L': ['UUA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG'], @@ -120,8 +119,7 @@ 'd': ['gau', 'gac'], 'e': ['gaa', 'gag'], 'g': ['ggu', 'ggc', 'gga', 'ggg'] - } - +} RNA_CODON_TABLE = { 'UUU': 'F', @@ -252,7 +250,7 @@ 'ggc': 'g', 'gga': 'g', 'ggg': 'g' - } +} def read_seq_from_fasta(path_to_seq: str, @@ -432,9 +430,8 @@ def check_all_upper(codon: str) -> bool: return check_upper - -def get_frameshift_proteins(seq: int, - check_if_user_conscious: bool = False, +def get_frameshift_proteins(seq: int, + check_if_user_conscious: bool = False, is_stop_codon_termination_enabled: bool = False, **_) -> dict: """ @@ -460,10 +457,10 @@ def get_frameshift_proteins(seq: int, frames_list = [] for rna in rnas: frame = '' - for i in range(frame_number, len(rna) - (frame_number + 1), 3): # set frame-dependent range to iterate - frame_codon = rna[i:i+3] # extract codon - if not check_all_upper(frame_codon): # check if all letters in codon uppercase - frame_codon = frame_codon.lower() # if not change all to lowercase + for i in range(frame_number, len(rna) - (frame_number + 1), 3): # set frame-dependent range to iterate + frame_codon = rna[i:i + 3] # extract codon + if not check_all_upper(frame_codon): # check if all letters in codon uppercase + frame_codon = frame_codon.lower() # if not change all to lowercase frame += RNA_CODON_TABLE[frame_codon] if is_stop_codon_termination_enabled and RNA_CODON_TABLE[ frame_codon] == '*': # stop writing if meet stop-codon @@ -665,13 +662,13 @@ def parse_input(inp: str, **kwargs) -> dict: """ parsed_dct = {} - inp_type = type(inp) # get input type + inp_type = type(inp) # get input type if inp_type == list: for i, seq in enumerate(inp): parsed_dct |= {i: seq} elif inp_type == dict: parsed_dct = inp - elif inp_type == str and '.' in inp: # check whether input has file extension symbols + elif inp_type == str and '.' in inp: # check whether input has file extension symbols parsed_dct = input_dct = read_seq_from_fasta(inp, **kwargs) elif inp_type == str: parsed_dct = {0: inp} From d0e4c2e89269a41bbcddd36ba905ae941d3952a7 Mon Sep 17 00:00:00 2001 From: Artem Vasilev Date: Sun, 1 Oct 2023 13:24:52 +0300 Subject: [PATCH 44/53] Update README.md --- HW4_Voskoboinikov/README.md | 124 ++++++++++++++++++++++++++++++++---- 1 file changed, 111 insertions(+), 13 deletions(-) diff --git a/HW4_Voskoboinikov/README.md b/HW4_Voskoboinikov/README.md index 3f23c7e..df21014 100644 --- a/HW4_Voskoboinikov/README.md +++ b/HW4_Voskoboinikov/README.md @@ -6,7 +6,11 @@ This project contains a `ultimate_protein_tools.py` program, which implements th ### Installation -TODO import from module!!!! +```python +import ultimate_protein_tools as upt +``` + +Make sure the path to the directory with `ultimate_protein_tools.py` is added to the PATH so that Python can find it when importing. ### Usage @@ -20,21 +24,115 @@ To exit the program, type `exit` in the console. While running the program, you can use next arguments for ***run_ultimate_protein_tools*** function (next – *main function*): -- `get_length_of_protein`: Calculates the length of a protein. -- `count_aa`: Counts the number of amino acids in a protein sequence. -- `get_fracture_of_aa`: Returns the fracture or percentage of amino acids in a protein sequence. +1. `read_seq_from_fasta`: Reads sequences from fasta file and returns dictionary. + + Arguments: + - path_to_seq (str): path to file + + Return: + - dict: dict of sequences names as keys and sequences themselves as values {'seq_name': 'sequence',} + +2. `find_sites`: Finds indexes of given sites. + + Arguments: + - seq (str): seq to be checked + - *args (str): sites to be found + - is_one_based (bool): whether result should be 0- (False) or 1-indexed (True). Default False + + Return: + - dict: dictionary of sites as keys and lists of indexes for the site where it's been found + +3. `get_protein_rnas`: Returns list of all possible RNA's from which can serve as matrix for protein synthesis. + + WARNING: can be computationally intensive on longer sequences, will NOT start unless check_if_user_conscious is True! + + Arguments: + - seq (str): seq to be checked + - check_if_user_conscious (bool): checks user's consciousness. Default False + + Return: + - list: list of possible RNA's as str + +4. `get_protein_rnas_number`: Get number of all possible RNA's for a given protein. + + Arguments: + - seq (str): seq to be checked + + Return: + - int: number of possible RNA's for seq + +5. `get_frameshift_proteins`: Returns list of all possible proteins from all possible frames in peptide. + + WARNING: can be computationally intensive on longer sequences, will NOT start unless check_if_user_conscious is True! + + Arguments: + - seq (str): seq to be checked + - check_if_user_conscious (bool): checks user's consciousness. Default False + - is_stop_codon_termination_enabled (bool): terminate translation when reached stop-codon. Default False. + + Return: + - dict: dict of lists of all possible frames proteins: + {frame_0: ['protein_seqs'], frame_1: ['protein_seqs'], frame_2: ['protein_seqs']} + +6. `get_length_of_protein`: Calculates the length of a protein. + + Argument: + - seq (str): sequence to calculate the length + + Return: + - int: sequence length + +7. `count_aa`: Counts the number of given or all amino acids in a protein sequence. + + Arguments: + - seq (str): sequence to count amino acids + - aminoacids (str): which amino acids to count in sequence. If you want to count all amino acids in the whole sequence, you can provide empty string to this argument or just don't provide this keyword + + Return: + - dict: a dictionary with amino acids and its count + +8. `get_fracture_of_aa`: Calculates the fracture or percentage of amino acids in a protein sequence. + + Arguments: + - seq (str): sequence in which you need to calculate the fracture of amino acids + - show_as_percentage (bool): change it to True, if you want to get results with percentages + - aminoacids (str): the fracture of which amino acids to count in the sequence + + Return: + - dict: a dictionary with amino acids and its fracture or percentage + +9. `calculate_protein_mass`: Calculates the molecular mass of a protein based on its amino acid sequence and a dictionary of amino acid masses. + + Arguments: + - sequence(str or list): A string or list of characters representing the amino acid sequence. + - aa_atomic_mass(dict): A dictionary linking amino acids to their masses in atomic mass units. + + Return: + - float: The molecular mass of a protein in atomic mass units, rounded to the third decimal place. + +10. `get_atomic_mass`: Calculates the molecular mass of a biological molecule, primarily an amino acid, based on a simple chemical formula. + + Arguments: + - chem (str): String representing a simple chemical formula, e.g. C2H5OH + - atomic_mass (dict[str, float], optional): A dictionary linking the chemical elements Carbon, Hydrogen, Oxygen, + Nitrogen, and Sulfur with their masses in atomic mass units. + + Return: + - float: Molecular mass of a biological molecule in atomic mass units. -- `read_seq_from_fasta`: -- `find_sites`: -- `get_protein_rnas_number`: -- `get_protein_rnas`: -- `get_frameshift_proteins`: +11. `convert_aa_name`: Converts a sequence of one-letter amino acid codes to three-letter designations. -- `calculate_protein_mass`: Calculates the molecular mass of a protein based on its amino acid sequence and a dictionary of amino acid masses. -- `get_atomic_mass`: Calculates the molecular mass of a biological molecule, primarily an amino acid, based on a simple chemical formula. -- `convert_aa_name`: Converts a sequence of one-letter amino acid codes to three-letter designations. + Arguments: + - sequence (str): String with one-letter amino acid codes. + - name_dict (dict[str, str], optional): A dictionary linking one-letter codes to three-letter designations. + If not provided, the standard AA_NAME_DICT dictionary is used. + - sep (str, optional): Separator between three-letter amino acid designations. There is no delimiter by default. + - use_default_register(bool, optional): Determines whether to preserve letter case in three-letter designations. + If True, the letters will be converted to upper or lower case depending on the case of the depending + on the case of the one-letter code. The default is False. -For additional information about arguments and output, please read the docstring for the desired function. + Return: + - str: A string of three-letter amino acid designations separated by the specified delimiter. ### Input of data From 37cc9b24c8f5f4070117ab0a14d8bc7474eda0be Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 13:43:29 +0300 Subject: [PATCH 45/53] Update run_ultimate_protein_tools and command_dct --- HW4_Voskoboinikov/ultimate_protein_tools.py | 26 +++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 0a65e50..7c0bc3a 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -474,7 +474,7 @@ def get_frameshift_proteins(seq: int, return "You don't fucking know what you're doing!" # politely ask user to reconsider their actions -def length_of_protein(seq: str, **_) -> int: +def get_length_of_protein(seq: str, **_) -> int: """ Calculates the length of a protein. @@ -649,9 +649,12 @@ def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = 'get_protein_rnas_number': get_protein_rnas_number, 'get_frameshift_proteins': get_frameshift_proteins, 'is_protein_valid': is_protein_valid, - 'length_of_protein': length_of_protein, + 'get_length_of_protein': get_length_of_protein, 'count_aa': count_aa, 'get_fracture_of_aa': get_fracture_of_aa, + 'calculate_protein_mass': calculate_protein_mass, + 'get_atomic_mass': get_atomic_mass, + 'convert_aa_name': convert_aa_name, } @@ -687,11 +690,26 @@ def run_ultimate_protein_tools(command, inp, *args, **kwargs): + """ + Accepts command and runs it on input data with params + + Arguments: + - command (str): Valid command from command_dct + - inp (str): Input in form of path, seq, seq list or seq dct + + Return: + - output_dct (dict): dict where keys are number or name of seq and values are results of command run + """ output_dct = {} input_dct = parse_input(inp) for name in input_dct: - if is_protein_valid(input_dct[name]): + if command in command_dct and command != 'get_atomic_mass': + if is_protein_valid(input_dct[name]): + output_dct[name] = command_dct[command](input_dct[name], *args, **kwargs) + else: + output_dct[name] = is_protein_valid(input_dct[name]) + elif command == 'get_atomic_mass': output_dct[name] = command_dct[command](input_dct[name], *args, **kwargs) else: - output_dct[name] = is_protein_valid(input_dct[name]) + print('Command invalid') return output_dct From aa60f8bfea2dbc5604cf7ac6de886f8158d1821f Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 13:53:44 +0300 Subject: [PATCH 46/53] Update typing in consts and some func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 7c0bc3a..1ff58b4 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -23,7 +23,7 @@ H2O_WEIGHT: float = 18.01468 -AA_MASS_DICT: dict[str, float] = { +AA_MASS_DICT = { 'G': 75.0659, 'g': 75.0659, 'L': 131.17262, 'l': 131.17262, 'Y': 181.18894, 'y': 181.18894, @@ -46,7 +46,7 @@ 'T': 119.11826, 't': 119.11826, } -ATOMIC_MASS: dict[str, float] = { +ATOMIC_MASS = { 'C': 12.011, 'H': 1.00784, 'O': 15.999, @@ -54,7 +54,7 @@ 'S': 32.065 } -AA_NAME_DICT: dict[str, str] = { +AA_NAME_DICT = { 'G': 'Gly', 'g': 'Gly', 'L': 'Leu', 'l': 'Leu', 'Y': 'Tyr', 'y': 'Tyr', @@ -540,7 +540,7 @@ def get_fracture_of_aa(seq: str, show_as_percentage: bool = False, aminoacids: s return aa_dict_percent -def calculate_protein_mass(sequence: str, aa_atomic_mass: dict[str, float] = None) -> float: +def calculate_protein_mass(sequence: str, aa_atomic_mass: dict = None) -> float: """ Calculates the molecular mass of a protein based on its amino acid sequence and a dictionary of amino acid masses. @@ -566,7 +566,7 @@ def calculate_protein_mass(sequence: str, aa_atomic_mass: dict[str, float] = Non return round(total_mass, 3) -def get_atomic_mass(chem: str, atomic_mass: dict[str, float] = None) -> float: +def get_atomic_mass(chem: str, atomic_mass: dict = None) -> float: """ Calculates the molecular mass of a biological molecule, primarily an amino acid, based on a simple chemical formula. @@ -601,7 +601,7 @@ def get_atomic_mass(chem: str, atomic_mass: dict[str, float] = None) -> float: return total_mass -def convert_aa_name(sequence: str, name_dict: dict[str, str] = None, sep: str = '', +def convert_aa_name(sequence: str, name_dict: dict = None, sep: str = '', use_default_register: bool = True) -> str: """ Converts a sequence of one-letter amino acid codes to three-letter designations. From 5bbd64bd86b98d0839fbba48ac0d6abd823c8062 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 13:56:30 +0300 Subject: [PATCH 47/53] Remove unused AMINOACID_DICT const --- HW4_Voskoboinikov/ultimate_protein_tools.py | 23 --------------------- 1 file changed, 23 deletions(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 5572755..ca2c368 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -1,26 +1,3 @@ -AMINOACID_DICT = { - 'A': 'Alanine', 'a': 'alanine', - 'C': 'Cysteine', 'c': 'cysteine', - 'D': 'Aspartic acid', 'd': 'aspartic acid', - 'E': 'Glutamic acid', 'e': 'glutamic acid', - 'F': 'Phenylalanine', 'f': 'Phenylalanine', - 'G': 'Glycine', 'g': 'glycine', - 'H': 'Histidine', 'h': 'histidine', - 'I': 'Isoleucine', 'i': 'isoleucine', - 'K': 'Lysine', 'k': 'lysine', - 'L': 'Leucine', 'l': 'leucine', - 'M': 'Methionine', 'm': 'methionine', - 'N': 'Asparagine', 'n': 'asparagine', - 'P': 'Proline', 'p': 'proline', - 'Q': 'Glutamine', 'q': 'glutamine', - 'R': 'Arginine', 'r': 'arginine', - 'S': 'Serine', 's': 'serine', - 'T': 'Threonine', 't': 'threonine', - 'V': 'Valine', 'v': 'valine', - 'W': 'Tryptophan', 'w': 'tryptophan', - 'Y': 'Tyrosine', 'y': 'tyrosine' -} - H2O_WEIGHT: float = 18.01468 AA_MASS_DICT = { From b71c6656ae79268b5eed45eab9fe33f05cc45937 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 14:00:04 +0300 Subject: [PATCH 48/53] Update parse_input func --- HW4_Voskoboinikov/ultimate_protein_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index ca2c368..0069452 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -653,7 +653,7 @@ def parse_input(inp: str, **kwargs) -> dict: elif inp_type == dict: parsed_dct = inp elif inp_type == str and '.' in inp: # check whether input has file extension symbols - parsed_dct = input_dct = read_seq_from_fasta(inp, **kwargs) + parsed_dct = read_seq_from_fasta(inp, **kwargs) elif inp_type == str: parsed_dct = {0: inp} From f38ceb1b8944f8a7ce4ef3414f45c16577d42ad1 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 14:06:23 +0300 Subject: [PATCH 49/53] Update get_protein_rnas_number --- HW4_Voskoboinikov/ultimate_protein_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 0069452..5748e1d 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -373,7 +373,7 @@ def get_protein_rnas(seq: str, return "You don't know what you're doing!" # politely ask user to reconsider their actions -def get_protein_rnas_number(seq: int) -> int: +def get_protein_rnas_number(seq: int, **_) -> int: """ Get number of all possible RNA's for a given protein. From 73094a178854acff7afc27a7b8e72deecdba9f1b Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 14:24:41 +0300 Subject: [PATCH 50/53] Update kwargs in input --- HW4_Voskoboinikov/ultimate_protein_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index 5748e1d..e09cda6 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -675,7 +675,7 @@ def run_ultimate_protein_tools(command, - output_dct (dict): dict where keys are number or name of seq and values are results of command run """ output_dct = {} - input_dct = parse_input(inp) + input_dct = parse_input(inp, **kwargs) for name in input_dct: if command in command_dct and command != 'get_atomic_mass': if is_protein_valid(input_dct[name]): From 7b0299c54acb3391485ff54df43fe75b80ff3c32 Mon Sep 17 00:00:00 2001 From: Aleksandr Voskoboinikov <“wwoskie@gmail.com”> Date: Sun, 1 Oct 2023 14:37:05 +0300 Subject: [PATCH 51/53] Update run_ultimate_protein_tools --- HW4_Voskoboinikov/ultimate_protein_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/HW4_Voskoboinikov/ultimate_protein_tools.py b/HW4_Voskoboinikov/ultimate_protein_tools.py index e09cda6..5084e75 100644 --- a/HW4_Voskoboinikov/ultimate_protein_tools.py +++ b/HW4_Voskoboinikov/ultimate_protein_tools.py @@ -686,4 +686,6 @@ def run_ultimate_protein_tools(command, output_dct[name] = command_dct[command](input_dct[name], *args, **kwargs) else: print('Command invalid') + if len(output_dct) == 1: + return output_dct[list(output_dct.keys())[0]] return output_dct From d9aaaea4065aa37314d18febae68f0808654ccb5 Mon Sep 17 00:00:00 2001 From: Artem Vasilev <135062527+ArtemVaska@users.noreply.github.com> Date: Sun, 1 Oct 2023 14:54:31 +0300 Subject: [PATCH 52/53] Update README.md --- HW4_Voskoboinikov/README.md | 171 +++++++++++++++++++++++++++++++----- 1 file changed, 148 insertions(+), 23 deletions(-) diff --git a/HW4_Voskoboinikov/README.md b/HW4_Voskoboinikov/README.md index df21014..63fdb38 100644 --- a/HW4_Voskoboinikov/README.md +++ b/HW4_Voskoboinikov/README.md @@ -14,15 +14,7 @@ Make sure the path to the directory with `ultimate_protein_tools.py` is added to ### Usage -To run the script, just call it from the directory where the program is located: - -``` -python ultimate_protein_tools.py -``` - -To exit the program, type `exit` in the console. - -While running the program, you can use next arguments for ***run_ultimate_protein_tools*** function (next – *main function*): +You can use next arguments for ***run_ultimate_protein_tools*** function: 1. `read_seq_from_fasta`: Reads sequences from fasta file and returns dictionary. @@ -136,33 +128,166 @@ While running the program, you can use next arguments for ***run_ultimate_protei ### Input of data -During each run of the main function, the user is required to enter a **protein sequence / sequences** that must be processed using the procedures listed above. +During each run of the main function, the user is required to enter a **protein sequence / sequences / fasta-file** that must be processed using the procedures listed above. The program involves the analysis of protein sequences consisting of **20 canonical amino acids**. If the data is entered incorrectly, an appropriate error will be displayed. ```python -run_ultimate_protein_tools('AZAZA', get_length_of_protein) -> ValueError #TODO add error message +run_ultimate_protein_tools('AZAZA', get_length_of_protein) +False ``` ### Examples ```python -run_ultimate_protein_tools('MAGDVLAGTTTSDRAAGALGTLGTAATLRAATDGLLQR', get_length_of_protein) -> 38 -run_ultimate_protein_tools('MAGDVLAGTTTSDRAAGALGTLGTAATLRAATDGLLQR', aminoacids='AT', count_aa) -> {'A': 9, 'T': 7} -run_ultimate_protein_tools('MAGDVLAGTTTSDRAAGALGTLGTAATLRAATDGLLQR', aminoacids='L', get_fracture_of_aa) -> {'L': 0.1579} -run_ultimate_protein_tools('MAGDVLAGTTTSDRAAGALGTLGTAATLRAATDGLLQR', aminoacids='DRG', get_fracture_of_aa, show_as_percentage=True) -> {'D': 7.89, 'R': 7.89, 'G': 15.79} - -#TODO examples for other functions +read_seq_from_fasta('/content/testdata.fasta', use_full_name=True) + +{'crab_anapl ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 'MDITIHNPLIRRPLFSWLAPSRIFDQIFGEHLQESELLPASPSLSPFLMRSPIFRMPSWLETGLSEMRLEKDKFSVNLDVKHFSPEELKVKVLGDMVEIHGKHEERQDEHGFIAREFNRKYRIPADVDPLTITSSLSLDGVLTVSAPRKQSDVPERSIPITREEKPAIAGAQRK', + 'crab_bovin ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 'MDIAIHHPWIRRPFFPFHSPSRLFDQFFGEHLLESDLFPASTSLSPFYLRPPSFLRAPSWIDTGLSEMRLEKDRFSVNLDVKHFSPEELKVKVLGDVIEVHGKHEERQDEHGFISREFHRKYRIPADVDPLAITSSLSSDGVLTVNGPRKQASGPERTIPITREEKPAVTAAPKK', + 'crab_chick ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 'MDITIHNPLVRRPLFSWLTPSRIFDQIFGEHLQESELLPTSPSLSPFLMRSPFFRMPSWLETGLSEMRLEKDKFSVNLDVKHFSPEELKVKVLGDMIEIHGKHEERQDEHGFIAREFSRKYRIPADVDPLTITSSLSLDGVLTVSAPRKQSDVPERSIPITREEKPAIAGSQRK', + 'crab_human ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 'MDIAIHHPWIRRPFFPFHSPSRLFDQFFGEHLLESDLFPTSTSLSPFYLRPPSFLRAPSWFDTGLSEMRLEKDRFSVNLDVKHFSPEELKVKVLGDVIEVHGKHEERQDEHGFISREFHRKYRIPADVDPLTITSSLSSDGVLTVNGPRKQVSGPERTIPITREEKPAVTAAPKK', + 'crab_mesau ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 'MDIAIHHPWIRRPFFPFHSPSRLFDQFFGEHLLESDLFSTATSLSPFYLRPPSFLRAPSWIDTGLSEMRMEKDRFSVNLDVKHFSPEELKVKVLGDVVEVHGKHEERQDEHGFISREFHRKYRIPADVDPLTITSSLSSDGVLTVNGPRKQASGPERTIPITREEKPAVTAAPKK', + 'crab_mouse ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN) (P23).': 'MDIAIHHPWIRRPFFPFHSPSRLFDQFFGEHLLESDLFSTATSLSPFYLRPPSFLRAPSWIDTGLSEMRLEKDRFSVNLDVKHFSPEELKVKVLGDVIEVHGKHEERQDEHGFISREFHRKYRIPADVDPLAITSSLSSDGVLTVNGPRKQVSGPERTIPITREEKPAVAAAPKK', + 'crab_rabit ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 'MDIAIHHPWIRRPFFPFHSPSRLFDQFFGEHLLESDLFPTSTSLSPFYLRPPSFLRAPSWIDTGLSEMRLEKDRFSVNLDVKHFSPEELKVKVLGDVIEVHGKHEERQDEHGFISREFHRKYRIPADVDPLTITSSLSSDGVLTVNGPRKQAPGPERTIPITREEKPAVTAAPKK', + 'crab_rat ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 'MDIAIHHPWIRRPFFPFHSPSRLFDQFFGEHLLESDLFSTATSLSPFYLRPPSFLRAPSWIDTGLSEMRMEKDRFSVNLDVKHFSPEELKVKVLGDVIEVHGKHEERQDEHGFISREFHRKYRIPADVDPLTITSSLSSDGVLTVNGPRKQASGPERTIPITREEKPAVTAAPKK', + 'crab_squac ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 'MDIAIQHPWLRRPLFPSSIFPSRIFDQNFGEHFDPDLFPSFSSMLSPFYWRMGAPMARMPSWAQTGLSELRLDKDKFAIHLDVKHFTPEELRVKILGDFIEVQAQHEERQDEHGYVSREFHRKYKVPAGVDPLVITCSLSADGVLTITGPRKVADVPERSVPISRDEKPAVAGPQQK'} + +find_sites('FSWLTPSRIFDQIFGEHLQESELLPTSPSLSPFLMRSPFFRMPSWLETGLS', 'M') +{'M': [34, 41]} + +run_ultimate_protein_tools('find_sites', '/content/testdata.fasta', 'M') +{'crab_anapl': {'M': [0, 48, 55, 66, 95]}, + 'crab_bovin': {'M': [0, 67]}, + 'crab_chick': {'M': [0, 48, 55, 66, 95]}, + 'crab_human': {'M': [0, 67]}, + 'crab_mesau': {'M': [0, 67, 69]}, + 'crab_mouse': {'M': [0, 67]}, + 'crab_rabit': {'M': [0, 67]}, + 'crab_rat': {'M': [0, 67, 69]}, + 'crab_squac': {'M': [0, 43, 51, 55, 58]}} + +run_ultimate_protein_tools('find_sites', 'FSWLTPSRIFDQIFGEHLQESELLPTSPSLSPFLMRSPFFRMPSWLETGLS', 'M') +{'M': [34, 41]} + +run_ultimate_protein_tools('get_protein_rnas', 'NnnN', check_if_user_conscious=True, use_full_name=True) +['AAUaauaauAAU', + 'AACaauaauAAU', + 'AAUaacaauAAU', + 'AACaacaauAAU', + 'AAUaauaacAAU', + 'AACaauaacAAU', + 'AAUaacaacAAU', + 'AACaacaacAAU', + 'AAUaauaauAAC', + 'AACaauaauAAC', + 'AAUaacaauAAC', + 'AACaacaauAAC', + 'AAUaauaacAAC', + 'AACaauaacAAC', + 'AAUaacaacAAC', + 'AACaacaacAAC'] + +run_ultimate_protein_tools('get_protein_rnas', 'NnnN', use_full_name=True) +'You don't know what you're doing!' + +run_ultimate_protein_tools('get_protein_rnas_number', '/content/testdata.fasta', use_full_name=True) +{'crab_anapl ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 306539842376921568815733271183477097188669192775870536614881550927197241900538112709230592, + 'crab_bovin ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 3444683660839151566170764002105685020808749008532141887447338361036488073592878215794262016, + 'crab_chick ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 919619527130764706447199813550431291566007578327611609844644652781591725701614338127691776, + 'crab_human ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 2296455773892767710780509334737123347205832672354761258298225574024325382395252143862841344, + 'crab_mesau ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 765485257964255903593503111579041115735277557451587086099408524674775127465084047954280448, + 'crab_mouse ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN) (P23).': 3444683660839151566170764002105685020808749008532141887447338361036488073592878215794262016, + 'crab_rabit ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 2296455773892767710780509334737123347205832672354761258298225574024325382395252143862841344, + 'crab_rat ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 574113943473191927695127333684280836801458168088690314574556393506081345598813035965710336, + 'crab_squac ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': 9450435283509332143129668044185692786855278487056630692585290428083643548951654913015808} + +run_ultimate_protein_tools('get_frameshift_proteins', 'NnnN', check_if_user_conscious=True) +{'frame_0': ['NnnN'], + 'frame_1': ['iit', 'tii', 'tti', 'iti', 'tit', 'itt', 'ttt', 'iii'], + 'frame_2': ['**q', '*qq', '***', '*q*', 'q**', 'qqq', 'qq*', 'q*q']} + +run_ultimate_protein_tools('get_frameshift_proteins', 'NnnN', check_if_user_conscious=True, is_stop_codon_termination_enabled=True) +{0: {'frame_0': ['NnnN'], + 'frame_1': ['iit', 'tii', 'tti', 'iti', 'tit', 'itt', 'ttt', 'iii'], + 'frame_2': ['qqq', '*', 'q*', 'qq*']}} + +run_ultimate_protein_tools('get_length_of_protein', '/content/testdata.fasta') +{'crab_anapl': 174, + 'crab_bovin': 175, + 'crab_chick': 174, + 'crab_human': 175, + 'crab_mesau': 175, + 'crab_mouse': 175, + 'crab_rabit': 175, + 'crab_rat': 175, + 'crab_squac': 177} + +run_ultimate_protein_tools('count_aa', '/content/testdata.fasta', 'MLK', use_full_name=True) +{'crab_anapl ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': {'M': 5, + 'L': 18, + 'K': 10}, + 'crab_bovin ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': {'M': 2, + 'L': 15, + 'K': 10}, + 'crab_chick ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': {'M': 5, + 'L': 18, + 'K': 10}, + 'crab_human ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': {'M': 2, + 'L': 15, + 'K': 10}, + 'crab_mesau ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': {'M': 3, + 'L': 14, + 'K': 10}, + 'crab_mouse ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN) (P23).': {'M': 2, + 'L': 15, + 'K': 10}, + 'crab_rabit ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': {'M': 2, + 'L': 15, + 'K': 10}, + 'crab_rat ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': {'M': 3, + 'L': 14, + 'K': 10}, + 'crab_squac ALPHA CRYSTALLIN B CHAIN (ALPHA(B)-CRYSTALLIN).': {'M': 5, + 'L': 13, + 'K': 9}} + +run_ultimate_protein_tools('count_aa', '/content/testdata.fasta', 'MLK') +{'crab_anapl': {'M': 5, 'L': 18, 'K': 10}, + 'crab_bovin': {'M': 2, 'L': 15, 'K': 10}, + 'crab_chick': {'M': 5, 'L': 18, 'K': 10}, + 'crab_human': {'M': 2, 'L': 15, 'K': 10}, + 'crab_mesau': {'M': 3, 'L': 14, 'K': 10}, + 'crab_mouse': {'M': 2, 'L': 15, 'K': 10}, + 'crab_rabit': {'M': 2, 'L': 15, 'K': 10}, + 'crab_rat': {'M': 3, 'L': 14, 'K': 10}, + 'crab_squac': {'M': 5, 'L': 13, 'K': 9}} + +run_ultimate_protein_tools('get_fracture_of_aa', 'NnnN') +{'n': 0.5, 'N': 0.5} + +run_ultimate_protein_tools('calculate_protein_mass', 'NnnN') +474.424 + +run_ultimate_protein_tools('calculate_protein_mass', '/content/testdata.fasta') +{'crab_anapl': 19936.699, + 'crab_bovin': 20036.552, + 'crab_chick': 20019.741, + 'crab_human': 20158.674, + 'crab_mesau': 20074.577, + 'crab_mouse': 20038.568, + 'crab_rabit': 20106.642, + 'crab_rat': 20088.604, + 'crab_squac': 20253.948} + +run_ultimate_protein_tools('get_atomic_mass', 'C2H5OH') +46.06804 + +run_ultimate_protein_tools('convert_aa_name', 'LTPSRIFDQIFGEHLQESELLP', use_default_register=False, sep='-') +Leu-Thr-Pro-Ser-Arg-Ile-Phe-Asp-Gln-Ile-Phe-Gly-Glu-His-Leu-Gln-Glu-Ser-Glu-Leu-Leu-Pro ``` -### Troubleshooting - -TODO change this - -If the program doesn't work – try to scream like opossum. - ### Contacts ![Wonderful Team](https://github.com/ArtemVaska/HW4_Functions2/blob/HW4_Vasilev/HW4_Voskoboinikov/Wonderful_team.jpg) From c7db4dd2fd16ec35ab423b3b896530aa1c7469b2 Mon Sep 17 00:00:00 2001 From: wwoskie <109810573+wwoskie@users.noreply.github.com> Date: Sun, 1 Oct 2023 14:59:49 +0300 Subject: [PATCH 53/53] Update README.md --- HW4_Voskoboinikov/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/HW4_Voskoboinikov/README.md b/HW4_Voskoboinikov/README.md index 63fdb38..261df98 100644 --- a/HW4_Voskoboinikov/README.md +++ b/HW4_Voskoboinikov/README.md @@ -16,6 +16,16 @@ Make sure the path to the directory with `ultimate_protein_tools.py` is added to You can use next arguments for ***run_ultimate_protein_tools*** function: +0. `run_ultimate_protein_tools` + Accepts command and runs it on input data with params + + Arguments: + - command (str): Valid command from command_dct + - inp (str): Input in form of path, seq, seq list or seq dct + + Return: + - output_dct (dict): dict where keys are number or name of seq and values are results of command run + 1. `read_seq_from_fasta`: Reads sequences from fasta file and returns dictionary. Arguments: