From b60aeb6e4dc5ce059025fecb218654dc767d924d Mon Sep 17 00:00:00 2001 From: OtterLawyer <130027058+OtterLawyer@users.noreply.github.com> Date: Sun, 1 Oct 2023 13:46:57 +0900 Subject: [PATCH 01/13] Create protein_tools.py --- HW4_Sivtsev/protein_tools.py | 44 ++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 HW4_Sivtsev/protein_tools.py diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py new file mode 100644 index 0000000..24a8e61 --- /dev/null +++ b/HW4_Sivtsev/protein_tools.py @@ -0,0 +1,44 @@ +aminoacid_alphabet_1to3 = {'A': 'Ala', 'R': 'Arg', 'N': 'Asn', 'D': 'Asp', 'C': 'Cys', + 'Q': 'Gln', 'E': 'Glu', 'G': 'Gly', 'H': 'His', 'I': 'Ile', + 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe', 'P': 'Pro', + 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val'} + +molecular_mass = {'A': 89.094, 'R': 174.203, 'N': 132.119, 'D': 133.104, 'C': 121.154, + 'E': 147.131, 'Q': 146.146, 'G': 75.067, 'H': 155.156, 'I': 131.175, + 'L': 131.175, 'K': 146.189, 'M': 149.208, 'F': 165.192, 'P': 115.132, + 'S': 105.093, 'T': 119.119, 'W': 204.228, 'Y': 181.191, 'V': 117.148} + + +def convert_1to3(prot: str) -> str: + """ + Converts 1-symbol aminoacid sequence into 3-symbol aminoacid sequence. + Arguments: + -prot (str) - aminoacid sequence in uppercase 1-symbol format + Return: + -output (str) - aminoacid sequence in 3-symbol format. + """ + output = '' + if len(prot) > 0: + for i in prot: + if i in aminoacid_alphabet_1to3: + output += aminoacid_alphabet_1to3[i] + else: + raise ValueError('Input format: aminoacids in uppercase 1-letter symbols') + return output + +def calculate_mm(prot: str) -> float: + """ + Calculates molecular mass of protein. + Argumets: + -prot (str) - aminoacid sequence in uppercase 1-symbol format. + Return: + -output (float) - molecular mass in float format with 2 digits after dot. + """ + prot_seq = set(prot) + output = 0 + if len(prot) == 1: + output = molecular_mass[prot] + else: + for i in prot_seq: + output += prot.count(i) * molecular_mass[i] - (18.0153*(len(prot)-1)) + return round(output,3) \ No newline at end of file From 7a688d6d937ca70397578171714c056a328a8d68 Mon Sep 17 00:00:00 2001 From: OtterLawyer <130027058+OtterLawyer@users.noreply.github.com> Date: Sun, 1 Oct 2023 13:53:07 +0900 Subject: [PATCH 02/13] Restart --- HW4_Sivtsev/protein_tools.py | 44 ------------------------------------ 1 file changed, 44 deletions(-) delete mode 100644 HW4_Sivtsev/protein_tools.py diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py deleted file mode 100644 index 24a8e61..0000000 --- a/HW4_Sivtsev/protein_tools.py +++ /dev/null @@ -1,44 +0,0 @@ -aminoacid_alphabet_1to3 = {'A': 'Ala', 'R': 'Arg', 'N': 'Asn', 'D': 'Asp', 'C': 'Cys', - 'Q': 'Gln', 'E': 'Glu', 'G': 'Gly', 'H': 'His', 'I': 'Ile', - 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe', 'P': 'Pro', - 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val'} - -molecular_mass = {'A': 89.094, 'R': 174.203, 'N': 132.119, 'D': 133.104, 'C': 121.154, - 'E': 147.131, 'Q': 146.146, 'G': 75.067, 'H': 155.156, 'I': 131.175, - 'L': 131.175, 'K': 146.189, 'M': 149.208, 'F': 165.192, 'P': 115.132, - 'S': 105.093, 'T': 119.119, 'W': 204.228, 'Y': 181.191, 'V': 117.148} - - -def convert_1to3(prot: str) -> str: - """ - Converts 1-symbol aminoacid sequence into 3-symbol aminoacid sequence. - Arguments: - -prot (str) - aminoacid sequence in uppercase 1-symbol format - Return: - -output (str) - aminoacid sequence in 3-symbol format. - """ - output = '' - if len(prot) > 0: - for i in prot: - if i in aminoacid_alphabet_1to3: - output += aminoacid_alphabet_1to3[i] - else: - raise ValueError('Input format: aminoacids in uppercase 1-letter symbols') - return output - -def calculate_mm(prot: str) -> float: - """ - Calculates molecular mass of protein. - Argumets: - -prot (str) - aminoacid sequence in uppercase 1-symbol format. - Return: - -output (float) - molecular mass in float format with 2 digits after dot. - """ - prot_seq = set(prot) - output = 0 - if len(prot) == 1: - output = molecular_mass[prot] - else: - for i in prot_seq: - output += prot.count(i) * molecular_mass[i] - (18.0153*(len(prot)-1)) - return round(output,3) \ No newline at end of file From 087f75a954db99926ab0c7d8bb1710bc16d1e7d9 Mon Sep 17 00:00:00 2001 From: OtterLawyer <130027058+OtterLawyer@users.noreply.github.com> Date: Sun, 1 Oct 2023 13:54:29 +0900 Subject: [PATCH 03/13] Add convert_1to3 function --- HW4_Sivtsev/protein_tools.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 HW4_Sivtsev/protein_tools.py diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py new file mode 100644 index 0000000..eccc2fa --- /dev/null +++ b/HW4_Sivtsev/protein_tools.py @@ -0,0 +1,21 @@ +aminoacid_alphabet_1to3 = {'A': 'Ala', 'R': 'Arg', 'N': 'Asn', 'D': 'Asp', 'C': 'Cys', + 'Q': 'Gln', 'E': 'Glu', 'G': 'Gly', 'H': 'His', 'I': 'Ile', + 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe', 'P': 'Pro', + 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val'} + +def convert_1to3(prot: str) -> str: + """ + Converts 1-symbol aminoacid sequence into 3-symbol aminoacid sequence. + Arguments: + -prot (str) - aminoacid sequence in uppercase 1-symbol format + Return: + -output (str) - aminoacid sequence in 3-symbol format. + """ + output = '' + if len(prot) > 0: + for i in prot: + if i in aminoacid_alphabet_1to3: + output += aminoacid_alphabet_1to3[i] + else: + raise ValueError('Input format: aminoacids in uppercase 1-letter symbols') + return output From dca28bf2cc90f74a337a226b83e1c129ed93eeec Mon Sep 17 00:00:00 2001 From: OtterLawyer <130027058+OtterLawyer@users.noreply.github.com> Date: Sun, 1 Oct 2023 13:55:38 +0900 Subject: [PATCH 04/13] Add calculate molecular mass function --- HW4_Sivtsev/protein_tools.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py index eccc2fa..9077439 100644 --- a/HW4_Sivtsev/protein_tools.py +++ b/HW4_Sivtsev/protein_tools.py @@ -2,7 +2,12 @@ 'Q': 'Gln', 'E': 'Glu', 'G': 'Gly', 'H': 'His', 'I': 'Ile', 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe', 'P': 'Pro', 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val'} - + +molecular_mass = {'A': 89.094, 'R': 174.203, 'N': 132.119, 'D': 133.104, 'C': 121.154, + 'E': 147.131, 'Q': 146.146, 'G': 75.067, 'H': 155.156, 'I': 131.175, + 'L': 131.175, 'K': 146.189, 'M': 149.208, 'F': 165.192, 'P': 115.132, + 'S': 105.093, 'T': 119.119, 'W': 204.228, 'Y': 181.191, 'V': 117.148} + def convert_1to3(prot: str) -> str: """ Converts 1-symbol aminoacid sequence into 3-symbol aminoacid sequence. @@ -19,3 +24,20 @@ def convert_1to3(prot: str) -> str: else: raise ValueError('Input format: aminoacids in uppercase 1-letter symbols') return output + +def calculate_mm(prot: str) -> float: + """ + Calculates molecular mass of protein. + Argumets: + -prot (str) - aminoacid sequence in uppercase 1-symbol format. + Return: + -output (float) - molecular mass in float format with 2 digits after dot. + """ + prot_seq = set(prot) + output = 0 + if len(prot) == 1: + output = molecular_mass[prot] + else: + for i in prot_seq: + output += prot.count(i) * molecular_mass[i] - (18.0153*(len(prot)-1)) + return round(output,3) \ No newline at end of file From 0bd2682b2a39ae6cfa7c7591cad01771f720e3c3 Mon Sep 17 00:00:00 2001 From: Liza Date: Sun, 1 Oct 2023 12:23:38 +0700 Subject: [PATCH 05/13] Add count_aa_length function --- HW4_Sivtsev/protein_tools.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py index 9077439..1d6541d 100644 --- a/HW4_Sivtsev/protein_tools.py +++ b/HW4_Sivtsev/protein_tools.py @@ -40,4 +40,15 @@ def calculate_mm(prot: str) -> float: else: for i in prot_seq: output += prot.count(i) * molecular_mass[i] - (18.0153*(len(prot)-1)) - return round(output,3) \ No newline at end of file + return round(output,3) + + +def count_aa_length (prot: str) -> int: + """ + Counts the length of the sequence + Arguments: + -prot (str) - the sequence, which length should be counted + Return: + -int - the result of the count + """ + return len(prot) From b9796a37cc5a6e04b58cbbfcb47a6b213a50ff68 Mon Sep 17 00:00:00 2001 From: Liza Date: Sun, 1 Oct 2023 12:24:41 +0700 Subject: [PATCH 06/13] Add count_nucl_length function --- HW4_Sivtsev/protein_tools.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py index 1d6541d..e2a2764 100644 --- a/HW4_Sivtsev/protein_tools.py +++ b/HW4_Sivtsev/protein_tools.py @@ -52,3 +52,14 @@ def count_aa_length (prot: str) -> int: -int - the result of the count """ return len(prot) + + +def count_nucl_length (prot: str) -> int: + """ + Counts the length of the nucleotide sequence that codes the inputted aminoacid sequence + Arguments: + -prot (str) - the sequence, which coding nucleotide sequence length should be counted + Return: + -int - the result of the count + """ + return len(prot)*3 \ No newline at end of file From 2f36bfc0342cb5ac98d9c42034be9eefc45dc450 Mon Sep 17 00:00:00 2001 From: Liza Date: Sun, 1 Oct 2023 12:25:46 +0700 Subject: [PATCH 07/13] Add main protein_tools function --- HW4_Sivtsev/protein_tools.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py index e2a2764..43a9545 100644 --- a/HW4_Sivtsev/protein_tools.py +++ b/HW4_Sivtsev/protein_tools.py @@ -62,4 +62,34 @@ def count_nucl_length (prot: str) -> int: Return: -int - the result of the count """ - return len(prot)*3 \ No newline at end of file + return len(prot)*3 + + +def protein_tools (function : str, *prots : str) -> (int, list, str): + """ + Consists of several functions, is able to: + -check whether the inputted sequence is a peptide + -count the length of the sequence + -count the length of the coding nucleotide sequence of the inputted sequence + -count the molecular mass of the sequence + -convert 1-letter input style into 3-letter and vice versa + -show the aminoacid content of the sequence + Arguments: + -function (str) - the name of the action, the user wants to do on the sequence(s) + -prots (str) - the sequence(s) that should be manipulated + Return: + -int - results of counts + -list or str - result of convertation or showing the content + + """ + functions = {'count_length':count_aa_length, 'count_nucleotide_length':count_nucl_length, + 'count_molecular_mass':calculate_mm, 'show_content':count_aa_content, 'convert_1_to_3':convert_1to3, + 'count_extinction_280nm':count_extinction_280nm } + protein = [] + for prot in prots: + is_prot(prot) + protein.append(functions[function](prot)) + if len(protein) == 1: + return protein[0] + else: + return protein \ No newline at end of file From 30b1dac4aacd30af6be614edfd2aded4037aee56 Mon Sep 17 00:00:00 2001 From: Albina Khairetdinova Date: Sun, 1 Oct 2023 12:10:29 +0600 Subject: [PATCH 08/13] Add count protein content function --- HW4_Sivtsev/protein_tools.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py index 43a9545..66ddfe1 100644 --- a/HW4_Sivtsev/protein_tools.py +++ b/HW4_Sivtsev/protein_tools.py @@ -65,6 +65,25 @@ def count_nucl_length (prot: str) -> int: return len(prot)*3 +def count_aa_content(prot: str) -> dict: + """ + Counts each aminoacid in protein and returns thire quantity + + Arguments: prot (str) - one of the input protein sequences was given by protein_tools + Return: aa_content (dict) - dict of aminoacids and their quantity in protein + """ + + aas = 'ACDEFGHIKLMNPQRSTVWY' + prot = prot.upper() + aa_counter = [0] * 20 + for i in range(len(prot)): + n = aas.index(prot[i]) + aa_counter[n] += 1 + + aa_content = dict(zip(list(aas), aa_counter)) + return aa_content + + def protein_tools (function : str, *prots : str) -> (int, list, str): """ Consists of several functions, is able to: @@ -92,4 +111,4 @@ def protein_tools (function : str, *prots : str) -> (int, list, str): if len(protein) == 1: return protein[0] else: - return protein \ No newline at end of file + return protein From e1441786792b3da0a6fc89e2960e34f63d2bfb5a Mon Sep 17 00:00:00 2001 From: Albina Khairetdinova Date: Sun, 1 Oct 2023 12:20:06 +0600 Subject: [PATCH 09/13] Add count_extinstion_280nm function --- HW4_Sivtsev/protein_tools.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py index 66ddfe1..d9d6028 100644 --- a/HW4_Sivtsev/protein_tools.py +++ b/HW4_Sivtsev/protein_tools.py @@ -84,6 +84,31 @@ def count_aa_content(prot: str) -> dict: return aa_content +def count_extinstion_280nm(prot: str) -> int: + """ + Counts extinction in 280nm according to W, Y, C (cystine) number. + + Transforms prot sequence into dictionary using count_aa_content(prot) function. + Uses the formula: e = 5500 * W + 1490 * Y + 125 * C + Cystine number is counted roughly. + + Arguments: prot (str) - one of the input protein sequences + Return: e (int) - result of counts: extinction coefficient at 280 nm + + """ + aa_cont_dict = count_aa_content(prot) + + W_number = aa_cont_dict.get('W') + Y_number = aa_cont_dict.get('Y') + C_number = aa_cont_dict.get('C') + + if C_number == 0: + e = 5500 * W_number + 1490 * Y_number + else: + e = 5500 * W_number + 1490 * Y_number + 125*(C_number//2) + return e + + def protein_tools (function : str, *prots : str) -> (int, list, str): """ Consists of several functions, is able to: From 308432d57ba4fc4d107a4d773398b77deb3955e4 Mon Sep 17 00:00:00 2001 From: Albina Khairetdinova Date: Sun, 1 Oct 2023 12:37:06 +0600 Subject: [PATCH 10/13] Add is_prot function --- HW4_Sivtsev/protein_tools.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py index d9d6028..5bda871 100644 --- a/HW4_Sivtsev/protein_tools.py +++ b/HW4_Sivtsev/protein_tools.py @@ -7,7 +7,26 @@ 'E': 147.131, 'Q': 146.146, 'G': 75.067, 'H': 155.156, 'I': 131.175, 'L': 131.175, 'K': 146.189, 'M': 149.208, 'F': 165.192, 'P': 115.132, 'S': 105.093, 'T': 119.119, 'W': 204.228, 'Y': 181.191, 'V': 117.148} - + + +def is_prot(prot: str) -> bool: + """ + Checks is given sequence a protein + Arguments: + prot (str) - aminoacid sequence of protein + Return: + bool if sequence is correct + ValueError('Please check proteins sequences') if there were wrong symbols + """ + aas = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'} + prot = prot.upper() + uniq_aas = set(prot) + aa_test = (uniq_aas <= aas) + if aa_test == 0: + raise ValueError('Please check proteins sequences') + return True + + def convert_1to3(prot: str) -> str: """ Converts 1-symbol aminoacid sequence into 3-symbol aminoacid sequence. @@ -25,6 +44,7 @@ def convert_1to3(prot: str) -> str: raise ValueError('Input format: aminoacids in uppercase 1-letter symbols') return output + def calculate_mm(prot: str) -> float: """ Calculates molecular mass of protein. @@ -43,7 +63,7 @@ def calculate_mm(prot: str) -> float: return round(output,3) -def count_aa_length (prot: str) -> int: +def count_aa_length(prot: str) -> int: """ Counts the length of the sequence Arguments: @@ -84,7 +104,7 @@ def count_aa_content(prot: str) -> dict: return aa_content -def count_extinstion_280nm(prot: str) -> int: +def count_extinction_280nm(prot: str) -> int: """ Counts extinction in 280nm according to W, Y, C (cystine) number. From 215b45c35f2e98d24b509f997bc9cb2f018e3f4e Mon Sep 17 00:00:00 2001 From: OtterLawyer <130027058+OtterLawyer@users.noreply.github.com> Date: Sun, 1 Oct 2023 15:42:43 +0900 Subject: [PATCH 11/13] Fix calculate_mm function --- HW4_Sivtsev/protein_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/HW4_Sivtsev/protein_tools.py b/HW4_Sivtsev/protein_tools.py index 5bda871..41a4a9b 100644 --- a/HW4_Sivtsev/protein_tools.py +++ b/HW4_Sivtsev/protein_tools.py @@ -59,7 +59,8 @@ def calculate_mm(prot: str) -> float: output = molecular_mass[prot] else: for i in prot_seq: - output += prot.count(i) * molecular_mass[i] - (18.0153*(len(prot)-1)) + output += prot.count(i) * molecular_mass[i] + output -= 18.0153*(len(prot)-1) return round(output,3) From 4403856d5d7892cae2c752d35156fd82e4574cda Mon Sep 17 00:00:00 2001 From: Zoea1 <143959084+Zoea1@users.noreply.github.com> Date: Sun, 1 Oct 2023 13:48:41 +0700 Subject: [PATCH 12/13] Update README.md --- README.md | 144 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 79 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index f918170..01a1f20 100644 --- a/README.md +++ b/README.md @@ -1,65 +1,79 @@ -# HW 4. Functions 2 -> *This is the repo for the fourth homework of the BI Python 2023 course* - -### Homework description - -На прошлой неделе вы делали утилиту для работы с последовательностями нуклеиновых кислот (с весьма строгим ТЗ). Пришло время для чего-то более самостоятельного. - -#### Основное задание - - -Напишите утилиту для работы с последовательностями белков. Там должно быть минимум 5 различных операций, должна быть какая-то точка входа через которую пользователь будет всё это дело использовать. На этом, по сути, всё. Всё целиком зависит от вашей фантазии и креативности. Можете опираться на ДЗ №2 и №3. - -Самая главная часть задания - это файл `README.md`. Сделайте краткое введение, напишите описание тула, приведите документацию по использованию со списком аргументов. Добавьте примеры использования. Возможно, вы захотите сделать секцию Troubleshooting. ***Почему это нужно?*** В этот раз проверяющий не будет знать того, как должен работать ваш тул. Это ваш авторский код. Даже самая прекрасная функциональность, не будучи отраженной в README, скорее всего останется незамеченной. README - это ваш способ познакомить пользователя с тулом, показать всё лучше и обосновать, почему именно ваша команда должна получить наивысший балл. - -Есть люди которые, любят писать документации, а есть те - кто не любит. Найдите в вашей команде того, кто любит. И в будущем в своих рабочих проектах всегда держите рядом такого человек (или будьте им). - -Примеры некоторых README, которыми можно вдохновляться: - -- [MetaFX](https://github.com/ctlab/metafx), тул Артёма Иванова. Там еще и [wiki](https://github.com/ctlab/metafx/wiki) крутое. -- [samovar](https://github.com/nvaulin/samovar) -- [MetaGEM](https://github.com/franciscozorrilla/metaGEM) -- [Pharokka](https://github.com/gbouras13/pharokka) - -Типовые секции, на которые стоит обратить внимание: Title, Overview, Usage, Options, Examples, Troubleshooting, Contacts. - -**Tехническое требование к заданию.** - -Это задание будет выполняться в командах по 3 человека. Каждый из членов команды должен внести ***как минимум*** 2 функции. Каждое внесение функции должно сопровождаться коммитом с осмысленным описанием коммита. Ниже приведена последовательность действий для успешного выполнения задания (аналогично ДЗ №2): - -1. Посмотрите состав своей команды здесь ([**ССЫЛКА**](https://docs.google.com/spreadsheets/d/1KMBBBu8LqauRpDJb0v1ldPwpvzNn8-KakcHexAcqLsE/edit?usp=sharing)). -2. Тимлид делает форк данного репозитория. **В форке создает ветку `HW4_`, в ветке создает папку `HW4_`, в этой папке вы всё делаете.** -3. Члены команды могут либо делать свои форки, либо работать в репозитории тимлида в качестве колабораторов ("contributors"). В любом случае делаете клоны => пишите код локально => пушите. -4. В конце тимлид делайет pull-request из `HW4_` своего репозитория в `main` этого. - - -А также: -- Сопроводите программу лучшим `README.md` файлом в вашей жизни (на английском языке). -- В этом ДЗ проблемы с качеством кода (нейминги, пустые строки, анноатции типов, док.стринги, пробелы) могут привести к снижению балла. Воспользуйтесь линтерами чтобы себя обезопасить. IDE по типу PyCharm или VSCode имеют фунцонал по авто-исправлению многих проблем такого рода. - -Автотестов на GitHub в этом ДЗ нет, но вы можете прогнать линтеры на качество кода локально (как в ДЗ №3, подробнее читайте [тут](https://plausible-cannon-091.notion.site/Code-auto-checks-02b2ea69c1d545fca07b50ce5933ed5f?pvs=4)). - -- Программа должна сохранять регистр символов. -- Программа должна работать только с последовательностями белков. -- Запрещается использование сторонних модулей. - - -### Форма сдачи - -Прикрепите ссылку на pull-request тимлида в Google Class (можете сделать от лица каждого члена команды, но это не обязательно). - - -### Pазбалловка - -- За каждую из 5 операций - максимум **1.5 балла** -- За README - максимум **2.5 балла** -- Если вы не внесли как минимум 2 функции от себя, вы получаете 0 баллов (на баллы остальных членов команды это не влияет). -- За фото созвона в README можно получить 0.2 доп. балла (но не более 10 баллов суммарно) - - - -### **Предполагаемый учебный результат** - -Это задание позволит вам проявить креативность и учиться быть не только кодером, но и автором. Также это задание поможет окончательно закрепить материал по функциям который мы прошли. - -Удачи! ✨✨ +# protein_tools.py +There is a tool, written in Python, for working with protein sequences. It contains several functions, described below in the section "Usage". + +## Installation +Download protein_tools.py, adapt it to your code and relax. + +## Usage +Provide a tool with the sequence(s) of the protein(s) in 1-letter format (for example, DYKDDDDK) and the function needed. If you +occasionally write down a non-peptide sequence, the programm will return an error. + +Here is the catalogue of actions the user can choose: + +- count_length: gives the length(s) of the protein sequence(s) +- count_nucleotide_length: counts the length(s) of the coding nucleotide sequence(s) of the protein sequence(s) +- count_molecular_mass: calculates molecular mass of the input (the algorithm takes into consideration water mass and subtracts it) +- show_content: shows the aminoacid content of the protein(s) +- convert_1_to_3: converts 1-letter format into 3-letter one +- count_extinction_280nm: counts the molar extinction coefficient (this function counts cystine contribution to extinction coefficient as two cysteins give 1 SS-bond) + +## Examples: +Examples for some of the protein_tools.py functions: +``` +function = 'count_aa_length' +prot1 = 'DYKDDDDK' +prot2 = 'DYKDDdDk' +``` +The result would be: +``` +[8, 8] +``` +Almost same result will be obtained when using 'count_nucl_length' + +Count molecular mass: +``` +Count molecular mass: +function = 'count_molecular_mass' +prot1 = 'DYKDDDDK' +``` +The result of programm work: +``` +760.768 +``` +Converting into 3-letter format +``` +function = 'convert_1to3' +prot1 = 'DYKDDDDK' +``` +The result: +``` +'AspTyrLysAspAspAspAspLys' +``` +Showing the content: +``` +function = 'show_content' +prot1 = 'DYKDDDDK' +``` +The user gets this: +``` +{'A': 0, 'C': 0, 'D': 5, 'E': 0, 'F': 0, 'G': 0, 'H': 0, 'I': 0, 'K': 2, 'L': 0, 'M': 0, 'N': 0, 'P': 0, 'Q': 0, 'R': 0, 'S': 0, 'T': 0, 'V': 0, 'W': 0, 'Y': 1} +``` +Count extinction coefficient 280nm: +``` +function = 'count_extinction_280nm' +prot1 = 'DYKDDDDK' +prot2 = 'AADDRR' +``` +The result: +``` +[1490, 0] +``` +## Troubleshooting +If the user sees ValueError, the user may inputted a non-protein sequence. The programm works with protein sequences in 1-letter format only. Please, check the sequence. +## Authors' contribution: +- Alexei Sivtsev: calculate_mm, convert_1to3 (team leader) +- Albina Khairetdinova: count_aa_content, count_extinction_280nm, is_prot (it is the inner function, that appears only when the sequence is non-protein and returns ValueError) +- Elizaveta Zolotenkova: main function protein_tools, function count_aa_length, function count_nucl_length and Read.me + +## Additional information (a photo of the authors) +![authors](https://github.com/Zoea1/HW4_Functions2/assets/143959084/114d6852-8fb8-4bcc-baf7-873eb3d85a5e) From 09005fd585886a85c9229c7127d5ed3a3f7e32b6 Mon Sep 17 00:00:00 2001 From: OtterLawyer <130027058+OtterLawyer@users.noreply.github.com> Date: Sun, 1 Oct 2023 16:16:34 +0900 Subject: [PATCH 13/13] Update README.md Edit READ.md add coursive --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 01a1f20..5b8b3bc 100644 --- a/README.md +++ b/README.md @@ -10,12 +10,12 @@ occasionally write down a non-peptide sequence, the programm will return an erro Here is the catalogue of actions the user can choose: -- count_length: gives the length(s) of the protein sequence(s) -- count_nucleotide_length: counts the length(s) of the coding nucleotide sequence(s) of the protein sequence(s) -- count_molecular_mass: calculates molecular mass of the input (the algorithm takes into consideration water mass and subtracts it) -- show_content: shows the aminoacid content of the protein(s) -- convert_1_to_3: converts 1-letter format into 3-letter one -- count_extinction_280nm: counts the molar extinction coefficient (this function counts cystine contribution to extinction coefficient as two cysteins give 1 SS-bond) +- *count_length*: gives the length(s) of the protein sequence(s) +- *count_nucleotide_length*: counts the length(s) of the coding nucleotide sequence(s) of the protein sequence(s) +- *count_molecular_mass*: calculates molecular mass of the input (the algorithm takes into consideration water mass and subtracts it) +- *show_content*: shows the aminoacid content of the protein(s) +- *convert_1_to_3*: converts 1-letter format into 3-letter one +- *count_extinction_280nm*: counts the molar extinction coefficient (this function counts cystine contribution to extinction coefficient as two cysteins give 1 SS-bond) ## Examples: Examples for some of the protein_tools.py functions: @@ -71,9 +71,9 @@ The result: ## Troubleshooting If the user sees ValueError, the user may inputted a non-protein sequence. The programm works with protein sequences in 1-letter format only. Please, check the sequence. ## Authors' contribution: -- Alexei Sivtsev: calculate_mm, convert_1to3 (team leader) -- Albina Khairetdinova: count_aa_content, count_extinction_280nm, is_prot (it is the inner function, that appears only when the sequence is non-protein and returns ValueError) -- Elizaveta Zolotenkova: main function protein_tools, function count_aa_length, function count_nucl_length and Read.me +- Alexei Sivtsev: *calculate_mm*, *convert_1to3* (team leader) +- Albina Khairetdinova: *count_aa_content*, *count_extinction_280nm*, *is_prot* (it is the inner function, that appears only when the sequence is non-protein and returns ValueError) +- Elizaveta Zolotenkova: main function *protein_tools*, *count_aa_length*, *count_nucl_length* and Read.me ## Additional information (a photo of the authors) ![authors](https://github.com/Zoea1/HW4_Functions2/assets/143959084/114d6852-8fb8-4bcc-baf7-873eb3d85a5e)