From b652eb818366991969689f26b7ce1df66637bfe3 Mon Sep 17 00:00:00 2001 From: Maria Uzun Date: Tue, 26 Sep 2023 19:14:01 +0300 Subject: [PATCH 01/31] Create code file --- AAmigo.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 AAmigo.py diff --git a/AAmigo.py b/AAmigo.py new file mode 100644 index 0000000..50b0b52 --- /dev/null +++ b/AAmigo.py @@ -0,0 +1 @@ +Write your code here \ No newline at end of file From 00a477db3c45751f11f1b82b6a8638a17acd8b64 Mon Sep 17 00:00:00 2001 From: Maria Uzun Date: Wed, 27 Sep 2023 23:20:14 +0300 Subject: [PATCH 02/31] Add code --- AAmigo.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/AAmigo.py b/AAmigo.py index 50b0b52..580bfe7 100644 --- a/AAmigo.py +++ b/AAmigo.py @@ -1 +1,122 @@ -Write your code here \ No newline at end of file +def protein_mass(seq: str): + """ + + Calculate the mass (Da) of a protein based on its amino acids sequence. + Takes a string of amino acids, returns the molecular weight in Da. + Amino acids in the string should be indicated as one-letter symbols. + + """ + aa_seq = list(seq) + mass_dictionary = dict({'A': 89, 'R': 174, 'N': 132, 'D': 133, 'C': 121, 'Q': 146, 'E': 147, 'Z': 147, + 'G': 75, 'H': 155, 'I': 131, 'L': 131, 'K': 146, 'M': 149, 'F': 165, 'P': 115, 'S': 105, + 'T': 119, 'W': 204, 'Y': 181, 'V': 117}) + mass = 0 + for aa in aa_seq: + mass += mass_dictionary[aa] + + return mass +#print(protein_mass('MARY')) + +def aa_profile(seq: str): + """ + + Displays the proportion of hydrophobic, polar, negatively and positively charged amino acids in the protein. + Takes a string of amino acids, returns a dictionary. + Amino acids in the string should be indicated as one-letter symbols. + + """ + aa_seq = list(seq) + aa_biochemistry = dict( + {'hydrophobic': ['G', 'A', 'V', 'L', 'I', 'P', 'F', 'M', 'W'], 'polar': ['S', 'T', 'C', 'N', 'Q', 'Y'], + '- charged': ['E', 'D'], '+ charged': ['K', 'H', 'R']}) + profile = dict({'hydrophobic': 0, 'polar': 0, '- charged': 0, '+ charged': 0}) + for aa in aa_seq: + for group_name, group_list in aa_biochemistry.items(): + if aa in group_list: + profile[group_name] += 1 + + for group, count in profile.items(): + profile[group] = round((count/len(seq)), 2) + return profile + + +#print(aa_profile('EEKFG', 'EEKFG')) + +def aa_substring(*seq: str): + """ + + Searches for a substring of amino acids in the entire amino acid sequence. + Takes a string of amino acids and a substring, which should be found. + Returns the position in the original sequence where the searched one was found for the first time. + Amino acids in the string should be indicated as one-letter symbols. + + """ + aa_seq = list(seq) + aa_seq_upper = [] + for sequences in aa_seq: + up = sequences.upper() + aa_seq_upper.append(up) + amino_acids = aa_seq_upper[:-1] + substring = aa_seq_upper[-1] + results = [] + for sequences in amino_acids: + subst = sequences.find(substring) + results.append(subst) + return results +#aa_substring('RNDCEQEZGHeILKMFPESTWYa', 'A') + +def aa_count(*seq: str): + """ + + Finds how many times a particular amino acid or sequence of several amino acids occurs in the original sequence. + Takes a string of amino acids and a substring, which should be counted. + Returns the count of searched amino acids. + Amino acids in the string should be indicated as one-letter symbols. + + """ + aa_seq = list(seq) + aa_seq_upper = [] + for sequences in aa_seq: + up = sequences.upper() + aa_seq_upper.append(up) + amino_acids = aa_seq_upper[:-1] + substring = aa_seq_upper[-1] + results = [] + for sequences in amino_acids: + aa_count = sequences.count(substring) + results.append(aa_count) + return results +#aa_count('ARNDCQEeEZGHILKMFPSTWY','NDCQZGHILKMFPS','HI') + +def aa_tools(*args): + seq = args[:-1] + operation = args[-1] + non_aa_chars = set('BJOUXbjoux') + contains_non_aa = False + for sequence in seq: + contains_non_aa = False + for aa in sequence: + if aa in non_aa_chars: + contains_non_aa = True + break + if contains_non_aa: + break + if contains_non_aa: + return None + + if operation == "protein_mass": + protein_mass_result = protein_mass(*seq) + return protein_mass_result + + if operation == "aa_profile": + aa_profile_result = aa_profile(*seq) + return aa_profile_result + + if operation == "aa_substring": + aa_substring_result = aa_substring(*seq) + return aa_substring_result + + if operation == "aa_count": + aa_count_result = aa_count(*seq) + return aa_count_result +aa_tools(*args)) \ No newline at end of file From f32cf2bffe47d718dbe885096119f357f076e282 Mon Sep 17 00:00:00 2001 From: zhurkv Date: Fri, 29 Sep 2023 16:57:41 +0300 Subject: [PATCH 03/31] Add protein_length function --- HW4_Uzun/AAmigo.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 4eaac70..73ef9fc 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -1 +1,15 @@ -Add your code here \ No newline at end of file +Add your code here +def protein_length(*seqs: str): + """ + + Calculate the length (number of amino acids) of a protein. + Takes a string of amino acids, returns the number. + Amino acids in the string should be indicated as one-letter symbols. + + """ + lengths = [] + + for seq in seqs: + lengths.append(len(seq)) + + return lengths \ No newline at end of file From a6a3033e2587995e295684f709c49fcd90946493 Mon Sep 17 00:00:00 2001 From: zhurkv Date: Fri, 29 Sep 2023 17:01:48 +0300 Subject: [PATCH 04/31] Add def essential_amino_acids function --- HW4_Uzun/AAmigo.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 73ef9fc..a37837b 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -12,4 +12,25 @@ def protein_length(*seqs: str): for seq in seqs: lengths.append(len(seq)) - return lengths \ No newline at end of file + return lengths + + + def essential_amino_acids(*seqs: str): + """ + + Calculate the number of essential amino acids based on its amino acids sequence. + Takes a string of amino acids, returns only the essential amino acids. + Amino acids in the string should be indicated as one-letter symbols. + + """ + eaa_dictionary = ['H', 'I', 'K', 'L', 'M', 'F', 'T', 'W', 'V', 'h', 'i', 'k', 'l', 'm', 'f', 't', 'w', 'v'] + eaa_list = [] + + for seq in seqs: + eaa_seq = [] + for amino_acid in seq: + if amino_acid in eaa_dictionary: + eaa_seq.append(amino_acid) + eaa_list.append(eaa_seq) + + return eaa_list \ No newline at end of file From 5a1ae4f9f2b884c68121a6e61ef2469c865d468f Mon Sep 17 00:00:00 2001 From: Maria Uzun Date: Fri, 29 Sep 2023 18:50:49 +0300 Subject: [PATCH 05/31] Move AAmigo.py to directory --- AAmigo.py => HW4_Uzun/AAmigo.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename AAmigo.py => HW4_Uzun/AAmigo.py (100%) diff --git a/AAmigo.py b/HW4_Uzun/AAmigo.py similarity index 100% rename from AAmigo.py rename to HW4_Uzun/AAmigo.py From 86f3bb28fa1c27a58220e25525c34d1ac5255073 Mon Sep 17 00:00:00 2001 From: Maria Uzun Date: Fri, 29 Sep 2023 19:12:00 +0300 Subject: [PATCH 06/31] Add new file AAmigo.py --- HW4_Uzun/AAmigo.py | 123 +-------------------------------------------- 1 file changed, 1 insertion(+), 122 deletions(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 580bfe7..4eaac70 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -1,122 +1 @@ -def protein_mass(seq: str): - """ - - Calculate the mass (Da) of a protein based on its amino acids sequence. - Takes a string of amino acids, returns the molecular weight in Da. - Amino acids in the string should be indicated as one-letter symbols. - - """ - aa_seq = list(seq) - mass_dictionary = dict({'A': 89, 'R': 174, 'N': 132, 'D': 133, 'C': 121, 'Q': 146, 'E': 147, 'Z': 147, - 'G': 75, 'H': 155, 'I': 131, 'L': 131, 'K': 146, 'M': 149, 'F': 165, 'P': 115, 'S': 105, - 'T': 119, 'W': 204, 'Y': 181, 'V': 117}) - mass = 0 - for aa in aa_seq: - mass += mass_dictionary[aa] - - return mass -#print(protein_mass('MARY')) - -def aa_profile(seq: str): - """ - - Displays the proportion of hydrophobic, polar, negatively and positively charged amino acids in the protein. - Takes a string of amino acids, returns a dictionary. - Amino acids in the string should be indicated as one-letter symbols. - - """ - aa_seq = list(seq) - aa_biochemistry = dict( - {'hydrophobic': ['G', 'A', 'V', 'L', 'I', 'P', 'F', 'M', 'W'], 'polar': ['S', 'T', 'C', 'N', 'Q', 'Y'], - '- charged': ['E', 'D'], '+ charged': ['K', 'H', 'R']}) - profile = dict({'hydrophobic': 0, 'polar': 0, '- charged': 0, '+ charged': 0}) - for aa in aa_seq: - for group_name, group_list in aa_biochemistry.items(): - if aa in group_list: - profile[group_name] += 1 - - for group, count in profile.items(): - profile[group] = round((count/len(seq)), 2) - return profile - - -#print(aa_profile('EEKFG', 'EEKFG')) - -def aa_substring(*seq: str): - """ - - Searches for a substring of amino acids in the entire amino acid sequence. - Takes a string of amino acids and a substring, which should be found. - Returns the position in the original sequence where the searched one was found for the first time. - Amino acids in the string should be indicated as one-letter symbols. - - """ - aa_seq = list(seq) - aa_seq_upper = [] - for sequences in aa_seq: - up = sequences.upper() - aa_seq_upper.append(up) - amino_acids = aa_seq_upper[:-1] - substring = aa_seq_upper[-1] - results = [] - for sequences in amino_acids: - subst = sequences.find(substring) - results.append(subst) - return results -#aa_substring('RNDCEQEZGHeILKMFPESTWYa', 'A') - -def aa_count(*seq: str): - """ - - Finds how many times a particular amino acid or sequence of several amino acids occurs in the original sequence. - Takes a string of amino acids and a substring, which should be counted. - Returns the count of searched amino acids. - Amino acids in the string should be indicated as one-letter symbols. - - """ - aa_seq = list(seq) - aa_seq_upper = [] - for sequences in aa_seq: - up = sequences.upper() - aa_seq_upper.append(up) - amino_acids = aa_seq_upper[:-1] - substring = aa_seq_upper[-1] - results = [] - for sequences in amino_acids: - aa_count = sequences.count(substring) - results.append(aa_count) - return results -#aa_count('ARNDCQEeEZGHILKMFPSTWY','NDCQZGHILKMFPS','HI') - -def aa_tools(*args): - seq = args[:-1] - operation = args[-1] - non_aa_chars = set('BJOUXbjoux') - contains_non_aa = False - for sequence in seq: - contains_non_aa = False - for aa in sequence: - if aa in non_aa_chars: - contains_non_aa = True - break - if contains_non_aa: - break - if contains_non_aa: - return None - - if operation == "protein_mass": - protein_mass_result = protein_mass(*seq) - return protein_mass_result - - if operation == "aa_profile": - aa_profile_result = aa_profile(*seq) - return aa_profile_result - - if operation == "aa_substring": - aa_substring_result = aa_substring(*seq) - return aa_substring_result - - if operation == "aa_count": - aa_count_result = aa_count(*seq) - return aa_count_result -aa_tools(*args)) \ No newline at end of file +Add your code here \ No newline at end of file From c70c0b5492624a9e43984d066c5f4e123f885680 Mon Sep 17 00:00:00 2001 From: icalledmyselfmoon <88886578+icalledmyselfmoon@users.noreply.github.com> Date: Fri, 29 Sep 2023 19:15:13 +0300 Subject: [PATCH 07/31] Add function protein_mass to AAmigo.py --- HW4_Uzun/AAmigo.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 4eaac70..1224c2d 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -1 +1,17 @@ -Add your code here \ No newline at end of file +def protein_mass(seq: str): + """ + + Calculate the mass (Da) of a protein based on its amino acids sequence. + Takes a string of amino acids, returns the molecular weight in Da. + Amino acids in the string should be indicated as one-letter symbols. + + """ + aa_seq = list(seq) + mass_dictionary = dict({'A': 89, 'R': 174, 'N': 132, 'D': 133, 'C': 121, 'Q': 146, 'E': 147, 'Z': 147, + 'G': 75, 'H': 155, 'I': 131, 'L': 131, 'K': 146, 'M': 149, 'F': 165, 'P': 115, 'S': 105, + 'T': 119, 'W': 204, 'Y': 181, 'V': 117}) + mass = 0 + for aa in aa_seq: + mass += mass_dictionary[aa] + + return mass From b9dce5b9636a8eb18bad989a90e7fb62254d5e2c Mon Sep 17 00:00:00 2001 From: Maria Uzun Date: Fri, 29 Sep 2023 19:15:22 +0300 Subject: [PATCH 08/31] Add aa_substring function --- HW4_Uzun/AAmigo.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 4eaac70..e8ea396 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -1 +1,21 @@ -Add your code here \ No newline at end of file +def aa_substring(seq: str): + """ + + Searches for a substring of amino acids in the entire amino acid sequence. + Takes a string of amino acids and a substring, which should be found. + Returns the position where the searched one was found for the first time. + Amino acids in the string should be indicated as one-letter symbols. + + """ + aa_seq = list(seq) + aa_seq_upper = [] + for sequences in aa_seq: + upper_case = sequences.upper() + aa_seq_upper.append(upper_case) + amino_acids = aa_seq_upper[:-1] + substring = aa_seq_upper[-1] + results = [] + for sequences in amino_acids: + subst = sequences.find(substring) + results.append(subst) + return results From 010a4561e6b000d54818fdf80a30c64e7f291ea3 Mon Sep 17 00:00:00 2001 From: Maria Uzun Date: Fri, 29 Sep 2023 19:20:35 +0300 Subject: [PATCH 09/31] Add aa_count function --- HW4_Uzun/AAmigo.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index e8ea396..39822f3 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -19,3 +19,26 @@ def aa_substring(seq: str): subst = sequences.find(substring) results.append(subst) return results + + +def aa_count(seq: str): + """ + + Finds how many times a particular sequence(s) occurs in the original one. + Takes a string of amino acids and a substring, which should be counted. + Returns the count of searched amino acids. + Amino acids in the string should be indicated as one-letter symbols. + + """ + aa_seq = list(seq) + aa_seq_upper = [] + for sequences in aa_seq: + upper_case = sequences.upper() + aa_seq_upper.append(upper_case) + amino_acids = aa_seq_upper[:-1] + substring = aa_seq_upper[-1] + results = [] + for sequences in amino_acids: + amino_acid_count = sequences.count(substring) + results.append(amino_acid_count) + return results \ No newline at end of file From 92d597c0e70eee6c35bd578abb288afd5237896d Mon Sep 17 00:00:00 2001 From: Maria Babaeva Date: Fri, 29 Sep 2023 19:23:09 +0300 Subject: [PATCH 10/31] Add functions protein_mass and aa_profile --- HW4_Uzun/AAmigo.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 1224c2d..c904ef4 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -15,3 +15,26 @@ def protein_mass(seq: str): mass += mass_dictionary[aa] return mass + +def aa_profile(seq: str): + """ + + Displays the proportion of hydrophobic, polar, negatively and positively charged amino acids in the protein. + Takes a string of amino acids, returns a dictionary. + Amino acids in the string should be indicated as one-letter symbols. + + """ + aa_seq = list(seq) + aa_biochemistry = dict( + {'hydrophobic': ['G', 'A', 'V', 'L', 'I', 'P', 'F', 'M', 'W'], 'polar': ['S', 'T', 'C', 'N', 'Q', 'Y'], + '- charged': ['E', 'D'], '+ charged': ['K', 'H', 'R']}) + profile = dict({'hydrophobic': 0, 'polar': 0, '- charged': 0, '+ charged': 0}) + + for aa in aa_seq: + for group_name, group_list in aa_biochemistry.items(): + if aa in group_list: + profile[group_name] += 1 + + for group, count in profile.items(): + profile[group] = round((count/len(seq)), 2) + return profile From d6e423e0005bc3d5811bae9d19e1177554074d78 Mon Sep 17 00:00:00 2001 From: Maria Uzun Date: Fri, 29 Sep 2023 19:26:45 +0300 Subject: [PATCH 11/31] Add main function aa_tools --- HW4_Uzun/AAmigo.py | 59 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 39822f3..c5f2464 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -41,4 +41,61 @@ def aa_count(seq: str): for sequences in amino_acids: amino_acid_count = sequences.count(substring) results.append(amino_acid_count) - return results \ No newline at end of file + return results + + +def aa_tools(*args): + """ + + Main function for amino acid sequences processing. + Parameters: *args (str) - amino acid sequences and operation. + Returns: List of results or None if non-amino acid chars found. + + """ + seq = args[:-1] + operation = args[-1] + non_aa_chars = set('BJOUXbjoux') + contains_non_aa = False + + for sequence in seq: + contains_non_aa = False + for amino_acid in sequence: + if amino_acid in non_aa_chars: + contains_non_aa = True + break + if contains_non_aa: + break + if contains_non_aa: + return None + + results = [] + + for sequence in seq: + if operation == "protein_mass": + result = protein_mass(sequence) + results.append(result) + + elif operation == "aa_profile": + result = aa_profile(sequence) + results.append(result) + + if operation == "aa_substring": + result = aa_substring(seq) + return result + + if operation == "aa_count": + result = aa_count(seq) + return result + + if operation == "protein_length": + result = protein_length(sequence) + results.append(result) + + if operation == "essential_amino_acids": + result = essential_amino_acids(sequence) + results.append(result) + + return results + + +aa_tools() \ No newline at end of file From e7413e745699c2e3c9b970c22278ac09d8704554 Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Fri, 29 Sep 2023 19:40:11 +0300 Subject: [PATCH 12/31] Update AAmigo.py --- HW4_Uzun/AAmigo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 682bc95..84c36d7 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -102,7 +102,7 @@ def protein_length(*seqs: str): return lengths - def essential_amino_acids(*seqs: str): +def essential_amino_acids(*seqs: str): """ Calculate the number of essential amino acids based on its amino acids sequence. From 96117dbf8523ae7ee2dedc8ef5a539fafea1edd5 Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Fri, 29 Sep 2023 19:53:29 +0300 Subject: [PATCH 13/31] Update README.md --- README.md | 137 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 72 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index f918170..6927363 100644 --- a/README.md +++ b/README.md @@ -1,65 +1,72 @@ -# HW 4. Functions 2 -> *This is the repo for the fourth homework of the BI Python 2023 course* - -### Homework description - -На прошлой неделе вы делали утилиту для работы с последовательностями нуклеиновых кислот (с весьма строгим ТЗ). Пришло время для чего-то более самостоятельного. - -#### Основное задание - - -Напишите утилиту для работы с последовательностями белков. Там должно быть минимум 5 различных операций, должна быть какая-то точка входа через которую пользователь будет всё это дело использовать. На этом, по сути, всё. Всё целиком зависит от вашей фантазии и креативности. Можете опираться на ДЗ №2 и №3. - -Самая главная часть задания - это файл `README.md`. Сделайте краткое введение, напишите описание тула, приведите документацию по использованию со списком аргументов. Добавьте примеры использования. Возможно, вы захотите сделать секцию Troubleshooting. ***Почему это нужно?*** В этот раз проверяющий не будет знать того, как должен работать ваш тул. Это ваш авторский код. Даже самая прекрасная функциональность, не будучи отраженной в README, скорее всего останется незамеченной. README - это ваш способ познакомить пользователя с тулом, показать всё лучше и обосновать, почему именно ваша команда должна получить наивысший балл. - -Есть люди которые, любят писать документации, а есть те - кто не любит. Найдите в вашей команде того, кто любит. И в будущем в своих рабочих проектах всегда держите рядом такого человек (или будьте им). - -Примеры некоторых README, которыми можно вдохновляться: - -- [MetaFX](https://github.com/ctlab/metafx), тул Артёма Иванова. Там еще и [wiki](https://github.com/ctlab/metafx/wiki) крутое. -- [samovar](https://github.com/nvaulin/samovar) -- [MetaGEM](https://github.com/franciscozorrilla/metaGEM) -- [Pharokka](https://github.com/gbouras13/pharokka) - -Типовые секции, на которые стоит обратить внимание: Title, Overview, Usage, Options, Examples, Troubleshooting, Contacts. - -**Tехническое требование к заданию.** - -Это задание будет выполняться в командах по 3 человека. Каждый из членов команды должен внести ***как минимум*** 2 функции. Каждое внесение функции должно сопровождаться коммитом с осмысленным описанием коммита. Ниже приведена последовательность действий для успешного выполнения задания (аналогично ДЗ №2): - -1. Посмотрите состав своей команды здесь ([**ССЫЛКА**](https://docs.google.com/spreadsheets/d/1KMBBBu8LqauRpDJb0v1ldPwpvzNn8-KakcHexAcqLsE/edit?usp=sharing)). -2. Тимлид делает форк данного репозитория. **В форке создает ветку `HW4_`, в ветке создает папку `HW4_`, в этой папке вы всё делаете.** -3. Члены команды могут либо делать свои форки, либо работать в репозитории тимлида в качестве колабораторов ("contributors"). В любом случае делаете клоны => пишите код локально => пушите. -4. В конце тимлид делайет pull-request из `HW4_` своего репозитория в `main` этого. - - -А также: -- Сопроводите программу лучшим `README.md` файлом в вашей жизни (на английском языке). -- В этом ДЗ проблемы с качеством кода (нейминги, пустые строки, анноатции типов, док.стринги, пробелы) могут привести к снижению балла. Воспользуйтесь линтерами чтобы себя обезопасить. IDE по типу PyCharm или VSCode имеют фунцонал по авто-исправлению многих проблем такого рода. - -Автотестов на GitHub в этом ДЗ нет, но вы можете прогнать линтеры на качество кода локально (как в ДЗ №3, подробнее читайте [тут](https://plausible-cannon-091.notion.site/Code-auto-checks-02b2ea69c1d545fca07b50ce5933ed5f?pvs=4)). - -- Программа должна сохранять регистр символов. -- Программа должна работать только с последовательностями белков. -- Запрещается использование сторонних модулей. - - -### Форма сдачи - -Прикрепите ссылку на pull-request тимлида в Google Class (можете сделать от лица каждого члена команды, но это не обязательно). - - -### Pазбалловка - -- За каждую из 5 операций - максимум **1.5 балла** -- За README - максимум **2.5 балла** -- Если вы не внесли как минимум 2 функции от себя, вы получаете 0 баллов (на баллы остальных членов команды это не влияет). -- За фото созвона в README можно получить 0.2 доп. балла (но не более 10 баллов суммарно) - - - -### **Предполагаемый учебный результат** - -Это задание позволит вам проявить креативность и учиться быть не только кодером, но и автором. Также это задание поможет окончательно закрепить материал по функциям который мы прошли. - -Удачи! ✨✨ +# AAmigo +This readme describes the user-friendly program AAmigo for performing various operations with amino acid sequences. + +AAmigo can perform different operations: +* Calculate the mass of a protein. +* Calculate the ratio of amino acids with different polarities in a protein +* Find for a particular amino acid(s) in the entire sequence +* Calculate amino acid's occurrence in a sequence +* Кристинина +* Кристинина + +## Usage +1. Clone this repo using SSH or HTTPS: +```bash +git clone git@github.com:uzunmasha/HW4_Functions2.git +``` +**or** +```bash +git clone https://github.com/uzunmasha/HW4_Functions2.git +``` +2. Launch the program with the required function (listed below) +3. Enjoy your results + +## List of functions: +For all functions, amino acids in the string should be indicated as one-letter symbols. Letters can be uppercase or lowercase. + +### aa_substring +This function searches for the presence of particular amino acid(s) in the entire amino acid sequence. As input, it takes a string of amino acids and a substring that needs to be found. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the position in the original sequence where the searched element was found for the first time. +Usage example: +```python +aa_tools('RNwDeACEQEZ', 'E','aa_substring') #4 +aa_tools('RNwDeACEQEZ', 'DFKAaaE','A','aa_substring') #[5, 3] +``` +### aa_count +This function finds how many times a particular amino acid or sequence of several amino acids occurs in the original sequence. As input, it takes a string of amino acids and a substring that needs to be counted. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the count of searched amino acid(s). +Usage example: +```python +aa_tools('GHcLfKF','f','aa_count') #2 +aa_tools('HILAKMaF', 'GDaKFAAE','A','aa_count') #[2, 3] +``` + +### +Части Маши и Кристины + +**Пример использования** + +```python +protein_mass('MARY') #593 (в Дальтонах) +aa_profile('EEKFG') #{'hydrophobic': 0.4, 'polar': 0.0, '- charged': 0.4, '+ charged': 0.2} +``` + +## Troubleshooting +* In function `'aa_substring'` the position counting starts at 0, so don't be confused if the second element in the sequence has the output [1]. +* In functions `'aa_substring'` and `'aa_count'` [-1] means that there is no such element in the sequence. +* In functions `'aa_substring'` and `'aa_count'` the error message "name '..' is not defined" means that the given argument is not quoted in the input string. + +## Developers and contacts +* Maria Uzun - contributed to `'aa_substring'`, `'aa_count'` and `aa_tools` functions. +* Maria Babaeva - contributed to `'protein_mass'` and `'aa_profile'` functions. +* Kristina Zhur - contributed to ... functions +* Julia the Cat - team's emotional support + +All team members contributed to README file according to the functions they developed. + +*Team's photo* + +In case of non-working code: + +* Please blame the one who has the paws +* Report any problems directly to the GitHub issue tracker +or +* Send your feedback to uzunmasha@gmail.com From cce5ee84acf9b1946c443c7dbd75da972035f3b6 Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sat, 30 Sep 2023 01:37:09 +0300 Subject: [PATCH 14/31] Update README.md --- README.md | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 6927363..68df7ee 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,8 @@ AAmigo can perform different operations: * Calculate the ratio of amino acids with different polarities in a protein * Find for a particular amino acid(s) in the entire sequence * Calculate amino acid's occurrence in a sequence -* Кристинина -* Кристинина +* Calculate amino acid sequence length +* Finds essential amino acids in a sequence ## Usage 1. Clone this repo using SSH or HTTPS: @@ -18,11 +18,20 @@ git clone git@github.com:uzunmasha/HW4_Functions2.git ```bash git clone https://github.com/uzunmasha/HW4_Functions2.git ``` -2. Launch the program with the required function (listed below) -3. Enjoy your results +2. Launch the program with the required function (listed below) in a code interpreter like Jupyter Notebook. +3. Enjoy your results! ## List of functions: -For all functions, amino acids in the string should be indicated as one-letter symbols. Letters can be uppercase or lowercase. +For all functions, amino acids in the sequences should be indicated as one-letter symbols. Letters can be uppercase or lowercase. + +### protein_mass +**Пример использования** + +```python +protein_mass('MARY') #593 (в Дальтонах) +aa_profile('EEKFG') #{'hydrophobic': 0.4, 'polar': 0.0, '- charged': 0.4, '+ charged': 0.2} +``` +### aa_profile ### aa_substring This function searches for the presence of particular amino acid(s) in the entire amino acid sequence. As input, it takes a string of amino acids and a substring that needs to be found. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the position in the original sequence where the searched element was found for the first time. @@ -38,16 +47,8 @@ Usage example: aa_tools('GHcLfKF','f','aa_count') #2 aa_tools('HILAKMaF', 'GDaKFAAE','A','aa_count') #[2, 3] ``` - -### -Части Маши и Кристины - -**Пример использования** - -```python -protein_mass('MARY') #593 (в Дальтонах) -aa_profile('EEKFG') #{'hydrophobic': 0.4, 'polar': 0.0, '- charged': 0.4, '+ charged': 0.2} -``` +### protein_length +### essential_amino_acids ## Troubleshooting * In function `'aa_substring'` the position counting starts at 0, so don't be confused if the second element in the sequence has the output [1]. @@ -55,18 +56,21 @@ aa_profile('EEKFG') #{'hydrophobic': 0.4, 'polar': 0.0, '- charged': 0.4, '+ cha * In functions `'aa_substring'` and `'aa_count'` the error message "name '..' is not defined" means that the given argument is not quoted in the input string. ## Developers and contacts -* Maria Uzun - contributed to `'aa_substring'`, `'aa_count'` and `aa_tools` functions. +* Maria Uzun - contributed to `'aa_substring'`, `'aa_count'`, and `'aa_tools'` functions. * Maria Babaeva - contributed to `'protein_mass'` and `'aa_profile'` functions. -* Kristina Zhur - contributed to ... functions -* Julia the Cat - team's emotional support +* Kristina Zhur - contributed to `'protein_length'` and `'essential_amino_acids'` functions. +* Julia the Cat - team's emotional support. All team members contributed to README file according to the functions they developed. -*Team's photo* +![photo_2023-09-26_18-33-49_3](https://github.com/uzunmasha/HW4_Functions2/assets/44806106/63fdea24-5c0a-4650-8bed-181871aa540f) + In case of non-working code: * Please blame the one who has the paws * Report any problems directly to the GitHub issue tracker + or + * Send your feedback to uzunmasha@gmail.com From d7f1ef1eb3bec26bb9840ab5aa9c705a46282411 Mon Sep 17 00:00:00 2001 From: zhurkr <145059379+zhurkr@users.noreply.github.com> Date: Sat, 30 Sep 2023 09:31:07 +0300 Subject: [PATCH 15/31] Update README.md Add a description of protein_length and essential_amino_acids functions. Add Bibliography --- README.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 68df7ee..bd6ea27 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,8 @@ AAmigo can perform different operations: * Calculate the ratio of amino acids with different polarities in a protein * Find for a particular amino acid(s) in the entire sequence * Calculate amino acid's occurrence in a sequence -* Calculate amino acid sequence length -* Finds essential amino acids in a sequence +* Calculate amino acid sequence(s) length +* Finds essential amino acids in a sequence(s) ## Usage 1. Clone this repo using SSH or HTTPS: @@ -48,13 +48,27 @@ aa_tools('GHcLfKF','f','aa_count') #2 aa_tools('HILAKMaF', 'GDaKFAAE','A','aa_count') #[2, 3] ``` ### protein_length +This function can analyze an aminoacid sequence and gives a length of it (number of amino acids). Any number of amino acid sequences is possible. All sequences should be comma-separated. As input, it takes a string or strings of amino acids, as an output, the function returns the length of each protein. +Usage example: +```python +aa_tools("KKNNfF", "KKFFRRVV", "KK", 'protein_length') #[6, 8, 2] +``` ### essential_amino_acids +This function can analyze an amino acid sequence and gives a list of essential amino acids that are present in the sequence. +Any number of amino acid sequences is possible. All sequences should be comma-separated. As input, it takes a string or strings of amino acids, as an output, the function returns essential amino acids for each sequence. +Usage example: +```python +aa_tools("KKNNfF", "KKFFRRVV", "KK", 'essential_amino_acids') #[['K', 'K', 'f', 'F'], ['K', 'K', 'F', 'F', 'V', 'V'], ['K', 'K']] +``` ## Troubleshooting * In function `'aa_substring'` the position counting starts at 0, so don't be confused if the second element in the sequence has the output [1]. * In functions `'aa_substring'` and `'aa_count'` [-1] means that there is no such element in the sequence. * In functions `'aa_substring'` and `'aa_count'` the error message "name '..' is not defined" means that the given argument is not quoted in the input string. +## Bibliography +[1] Wu G. Amino acids: metabolism, functions, and nutrition. Amino Acids. 2009 May;37(1):1-17. doi: 10.1007/s00726-009-0269-0. + ## Developers and contacts * Maria Uzun - contributed to `'aa_substring'`, `'aa_count'`, and `'aa_tools'` functions. * Maria Babaeva - contributed to `'protein_mass'` and `'aa_profile'` functions. From 4aca2c8e7592d70a39db4f18a632bbba9aa3ca37 Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sat, 30 Sep 2023 17:46:32 +0300 Subject: [PATCH 16/31] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bd6ea27..d924672 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ aa_tools("KKNNfF", "KKFFRRVV", "KK", 'essential_amino_acids') #[['K', 'K', 'f', ## Troubleshooting * In function `'aa_substring'` the position counting starts at 0, so don't be confused if the second element in the sequence has the output [1]. -* In functions `'aa_substring'` and `'aa_count'` [-1] means that there is no such element in the sequence. +* In functions `'aa_substring'` and `'aa_count'` output [-1] means that there is no such element in the sequence. * In functions `'aa_substring'` and `'aa_count'` the error message "name '..' is not defined" means that the given argument is not quoted in the input string. ## Bibliography From 59c1260555b27508ebc19a7f0aca8a34233edf94 Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sat, 30 Sep 2023 18:12:32 +0300 Subject: [PATCH 17/31] Add new function names --- HW4_Uzun/AAmigo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 84c36d7..4d8ade3 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -41,7 +41,7 @@ def aa_profile(seq: str): return profile -def aa_substring(seq: str): +def amino_acid_substring(seq: str): """ Searches for a substring of amino acids in the entire amino acid sequence. @@ -64,7 +64,7 @@ def aa_substring(seq: str): return results -def aa_count(seq: str): +def amino_acid_count(seq: str): """ Finds how many times a particular sequence(s) occurs in the original one. @@ -158,11 +158,11 @@ def aa_tools(*args): result = aa_profile(sequence) results.append(result) - if operation == "aa_substring": + if operation == "amino_acid_substring": result = aa_substring(seq) return result - if operation == "aa_count": + if operation == "amino_acid_count": result = aa_count(seq) return result From a241a9a64fb3b9a186ed65f42f44f10965843486 Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sat, 30 Sep 2023 18:18:53 +0300 Subject: [PATCH 18/31] Update README.md --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index d924672..4b9e2a3 100644 --- a/README.md +++ b/README.md @@ -33,19 +33,19 @@ aa_profile('EEKFG') #{'hydrophobic': 0.4, 'polar': 0.0, '- charged': 0.4, '+ cha ``` ### aa_profile -### aa_substring +### amino_acid_substring This function searches for the presence of particular amino acid(s) in the entire amino acid sequence. As input, it takes a string of amino acids and a substring that needs to be found. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the position in the original sequence where the searched element was found for the first time. Usage example: ```python -aa_tools('RNwDeACEQEZ', 'E','aa_substring') #4 -aa_tools('RNwDeACEQEZ', 'DFKAaaE','A','aa_substring') #[5, 3] +aa_tools('RNwDeACEQEZ', 'E','amino_acid_substring') #4 +aa_tools('RNwDeACEQEZ', 'DFKAaaE','A','amino_acid_substring') #[5, 3] ``` -### aa_count +### amino_acid_count This function finds how many times a particular amino acid or sequence of several amino acids occurs in the original sequence. As input, it takes a string of amino acids and a substring that needs to be counted. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the count of searched amino acid(s). Usage example: ```python -aa_tools('GHcLfKF','f','aa_count') #2 -aa_tools('HILAKMaF', 'GDaKFAAE','A','aa_count') #[2, 3] +aa_tools('GHcLfKF','f','amino_acid_count') #2 +aa_tools('HILAKMaF', 'GDaKFAAE','A','amino_acid_count') #[2, 3] ``` ### protein_length This function can analyze an aminoacid sequence and gives a length of it (number of amino acids). Any number of amino acid sequences is possible. All sequences should be comma-separated. As input, it takes a string or strings of amino acids, as an output, the function returns the length of each protein. @@ -62,15 +62,15 @@ aa_tools("KKNNfF", "KKFFRRVV", "KK", 'essential_amino_acids') #[['K', 'K', 'f', ``` ## Troubleshooting -* In function `'aa_substring'` the position counting starts at 0, so don't be confused if the second element in the sequence has the output [1]. -* In functions `'aa_substring'` and `'aa_count'` output [-1] means that there is no such element in the sequence. -* In functions `'aa_substring'` and `'aa_count'` the error message "name '..' is not defined" means that the given argument is not quoted in the input string. +* In function `'amino_acid_substring'` the position counting starts at 0, so don't be confused if the second element in the sequence has the output [1]. +* In functions `'amino_acid_substring'` and `'amino_acid_count'` output [-1] means that there is no such element in the sequence. +* In functions `'amino_acid_substring'` and `'amino_acid_count'` the error message "name '..' is not defined" means that the given argument is not quoted in the input string. ## Bibliography [1] Wu G. Amino acids: metabolism, functions, and nutrition. Amino Acids. 2009 May;37(1):1-17. doi: 10.1007/s00726-009-0269-0. ## Developers and contacts -* Maria Uzun - contributed to `'aa_substring'`, `'aa_count'`, and `'aa_tools'` functions. +* Maria Uzun - contributed to `'amino_acid_substring'`, `'amino_acid_count'`, and `'aa_tools'` functions. * Maria Babaeva - contributed to `'protein_mass'` and `'aa_profile'` functions. * Kristina Zhur - contributed to `'protein_length'` and `'essential_amino_acids'` functions. * Julia the Cat - team's emotional support. From de074382d4ef7327d347610a03ffb00ee36bf30d Mon Sep 17 00:00:00 2001 From: zhurkr <145059379+zhurkr@users.noreply.github.com> Date: Sat, 30 Sep 2023 21:35:58 +0300 Subject: [PATCH 19/31] Update README.md Replace double quotes with single quotes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4b9e2a3..7bb341b 100644 --- a/README.md +++ b/README.md @@ -51,14 +51,14 @@ aa_tools('HILAKMaF', 'GDaKFAAE','A','amino_acid_count') #[2, 3] This function can analyze an aminoacid sequence and gives a length of it (number of amino acids). Any number of amino acid sequences is possible. All sequences should be comma-separated. As input, it takes a string or strings of amino acids, as an output, the function returns the length of each protein. Usage example: ```python -aa_tools("KKNNfF", "KKFFRRVV", "KK", 'protein_length') #[6, 8, 2] +aa_tools('KKNNfF', 'KKFFRRVV', 'KK', 'protein_length') #[6, 8, 2] ``` ### essential_amino_acids This function can analyze an amino acid sequence and gives a list of essential amino acids that are present in the sequence. Any number of amino acid sequences is possible. All sequences should be comma-separated. As input, it takes a string or strings of amino acids, as an output, the function returns essential amino acids for each sequence. Usage example: ```python -aa_tools("KKNNfF", "KKFFRRVV", "KK", 'essential_amino_acids') #[['K', 'K', 'f', 'F'], ['K', 'K', 'F', 'F', 'V', 'V'], ['K', 'K']] +aa_tools('KKNNfF', 'KKFFRRVV', 'KK', 'essential_amino_acids') #[['K', 'K', 'f', 'F'], ['K', 'K', 'F', 'F', 'V', 'V'], ['K', 'K']] ``` ## Troubleshooting From cd874f3a5eb8f13c1d196d212d366ef084206c59 Mon Sep 17 00:00:00 2001 From: zhurkr <145059379+zhurkr@users.noreply.github.com> Date: Sat, 30 Sep 2023 21:49:27 +0300 Subject: [PATCH 20/31] Update README.md Correct essential_amino_acids function description --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7bb341b..a0f400b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ AAmigo can perform different operations: * Find for a particular amino acid(s) in the entire sequence * Calculate amino acid's occurrence in a sequence * Calculate amino acid sequence(s) length -* Finds essential amino acids in a sequence(s) +* Finds essential amino acids (in humans) in a sequence(s) ## Usage 1. Clone this repo using SSH or HTTPS: @@ -54,7 +54,7 @@ Usage example: aa_tools('KKNNfF', 'KKFFRRVV', 'KK', 'protein_length') #[6, 8, 2] ``` ### essential_amino_acids -This function can analyze an amino acid sequence and gives a list of essential amino acids that are present in the sequence. +This function can analyze an amino acid sequence and gives a list of essential amino acids (in humans) that are present in the sequence. Any number of amino acid sequences is possible. All sequences should be comma-separated. As input, it takes a string or strings of amino acids, as an output, the function returns essential amino acids for each sequence. Usage example: ```python From cbf64dbe67503f43f38665413a0fd7ea446f5a29 Mon Sep 17 00:00:00 2001 From: zhurkr <145059379+zhurkr@users.noreply.github.com> Date: Sat, 30 Sep 2023 21:56:33 +0300 Subject: [PATCH 21/31] Update AAmigo.py Change function input arguments to general form --- HW4_Uzun/AAmigo.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 4d8ade3..a7c0444 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -86,7 +86,7 @@ def amino_acid_count(seq: str): results.append(amino_acid_count) return results -def protein_length(*seqs: str): +def protein_length(*seq: str): """ Calculate the length (number of amino acids) of a protein. @@ -96,13 +96,13 @@ def protein_length(*seqs: str): """ lengths = [] - for seq in seqs: - lengths.append(len(seq)) + for sequences in seq: + lengths.append(len(sequences)) return lengths -def essential_amino_acids(*seqs: str): +def essential_amino_acids(*seq: str): """ Calculate the number of essential amino acids based on its amino acids sequence. @@ -113,9 +113,9 @@ def essential_amino_acids(*seqs: str): eaa_dictionary = ['H', 'I', 'K', 'L', 'M', 'F', 'T', 'W', 'V', 'h', 'i', 'k', 'l', 'm', 'f', 't', 'w', 'v'] eaa_list = [] - for seq in seqs: + for sequences in seq: eaa_seq = [] - for amino_acid in seq: + for amino_acid in sequences: if amino_acid in eaa_dictionary: eaa_seq.append(amino_acid) eaa_list.append(eaa_seq) From 965be18568913cfea75457b314c44305982e8513 Mon Sep 17 00:00:00 2001 From: icalledmyselfmoon <88886578+icalledmyselfmoon@users.noreply.github.com> Date: Sun, 1 Oct 2023 07:33:58 +0300 Subject: [PATCH 22/31] Update functions to allow lower case in string as an input --- HW4_Uzun/AAmigo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index a7c0444..c4b361e 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -6,7 +6,7 @@ def protein_mass(seq: str): Amino acids in the string should be indicated as one-letter symbols. """ - aa_seq = list(seq) + aa_seq = list(seq.upper()) mass_dictionary = dict({'A': 89, 'R': 174, 'N': 132, 'D': 133, 'C': 121, 'Q': 146, 'E': 147, 'Z': 147, 'G': 75, 'H': 155, 'I': 131, 'L': 131, 'K': 146, 'M': 149, 'F': 165, 'P': 115, 'S': 105, 'T': 119, 'W': 204, 'Y': 181, 'V': 117}) @@ -17,7 +17,7 @@ def protein_mass(seq: str): return mass -def aa_profile(seq: str): +def amino_acid_profile(seq: str): """ Displays the proportion of hydrophobic, polar, negatively and positively charged amino acids in the protein. @@ -25,7 +25,7 @@ def aa_profile(seq: str): Amino acids in the string should be indicated as one-letter symbols. """ - aa_seq = list(seq) + aa_seq = list(seq.upper()) aa_biochemistry = dict( {'hydrophobic': ['G', 'A', 'V', 'L', 'I', 'P', 'F', 'M', 'W'], 'polar': ['S', 'T', 'C', 'N', 'Q', 'Y'], '- charged': ['E', 'D'], '+ charged': ['K', 'H', 'R']}) From ca590c160b831ba83aa02e22cc37f4561c2d392c Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sun, 1 Oct 2023 10:54:56 +0300 Subject: [PATCH 23/31] Correct function names --- HW4_Uzun/AAmigo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index c4b361e..10c68b0 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -154,16 +154,16 @@ def aa_tools(*args): result = protein_mass(sequence) results.append(result) - elif operation == "aa_profile": - result = aa_profile(sequence) + elif operation == "amino_acid_profile": + result = amino_acid_profile(sequence) results.append(result) if operation == "amino_acid_substring": - result = aa_substring(seq) + result = amino_acid_substring(seq) return result if operation == "amino_acid_count": - result = aa_count(seq) + result = amino_acid_count(seq) return result if operation == "protein_length": From 1dc14027747dbcc088647c168369f4226a14bf1e Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sun, 1 Oct 2023 11:53:14 +0300 Subject: [PATCH 24/31] Update README.md --- README.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a0f400b..116cdc9 100644 --- a/README.md +++ b/README.md @@ -25,14 +25,17 @@ git clone https://github.com/uzunmasha/HW4_Functions2.git For all functions, amino acids in the sequences should be indicated as one-letter symbols. Letters can be uppercase or lowercase. ### protein_mass -**Пример использования** - +This function calculates the mass (Da) of a protein based on its amino acid sequence. As input, it takes a string of amino acids and returns the molecular weight in Da. +Usage example: +```python +protein_mass('MARY') #593 (in Da) +``` +### amino_acid_profile +This function displays the proportion of hydrophobic, polar, negatively, and positively charged amino acids in the protein. It takes a string of amino acids, and returns a dictionary with the result. +Usage example: ```python -protein_mass('MARY') #593 (в Дальтонах) aa_profile('EEKFG') #{'hydrophobic': 0.4, 'polar': 0.0, '- charged': 0.4, '+ charged': 0.2} ``` -### aa_profile - ### amino_acid_substring This function searches for the presence of particular amino acid(s) in the entire amino acid sequence. As input, it takes a string of amino acids and a substring that needs to be found. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the position in the original sequence where the searched element was found for the first time. Usage example: @@ -75,7 +78,6 @@ aa_tools('KKNNfF', 'KKFFRRVV', 'KK', 'essential_amino_acids') #[['K', 'K', 'f', * Kristina Zhur - contributed to `'protein_length'` and `'essential_amino_acids'` functions. * Julia the Cat - team's emotional support. -All team members contributed to README file according to the functions they developed. ![photo_2023-09-26_18-33-49_3](https://github.com/uzunmasha/HW4_Functions2/assets/44806106/63fdea24-5c0a-4650-8bed-181871aa540f) From 01e0ab5a95f1e83f3614498ec3bb1dfc1dd430b9 Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sun, 1 Oct 2023 11:53:29 +0300 Subject: [PATCH 25/31] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 116cdc9..83e68f5 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ aa_tools('KKNNfF', 'KKFFRRVV', 'KK', 'essential_amino_acids') #[['K', 'K', 'f', ## Developers and contacts * Maria Uzun - contributed to `'amino_acid_substring'`, `'amino_acid_count'`, and `'aa_tools'` functions. -* Maria Babaeva - contributed to `'protein_mass'` and `'aa_profile'` functions. +* Maria Babaeva - contributed to `'protein_mass'` and `'amino_acid_profile'` functions. * Kristina Zhur - contributed to `'protein_length'` and `'essential_amino_acids'` functions. * Julia the Cat - team's emotional support. From c456658a9f2942508d16730da7a212c24466d080 Mon Sep 17 00:00:00 2001 From: Maria Uzun Date: Sun, 1 Oct 2023 11:58:46 +0300 Subject: [PATCH 26/31] Add README to working directory --- HW4_Uzun/README.md | 92 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 HW4_Uzun/README.md diff --git a/HW4_Uzun/README.md b/HW4_Uzun/README.md new file mode 100644 index 0000000..83e68f5 --- /dev/null +++ b/HW4_Uzun/README.md @@ -0,0 +1,92 @@ +# AAmigo +This readme describes the user-friendly program AAmigo for performing various operations with amino acid sequences. + +AAmigo can perform different operations: +* Calculate the mass of a protein. +* Calculate the ratio of amino acids with different polarities in a protein +* Find for a particular amino acid(s) in the entire sequence +* Calculate amino acid's occurrence in a sequence +* Calculate amino acid sequence(s) length +* Finds essential amino acids (in humans) in a sequence(s) + +## Usage +1. Clone this repo using SSH or HTTPS: +```bash +git clone git@github.com:uzunmasha/HW4_Functions2.git +``` +**or** +```bash +git clone https://github.com/uzunmasha/HW4_Functions2.git +``` +2. Launch the program with the required function (listed below) in a code interpreter like Jupyter Notebook. +3. Enjoy your results! + +## List of functions: +For all functions, amino acids in the sequences should be indicated as one-letter symbols. Letters can be uppercase or lowercase. + +### protein_mass +This function calculates the mass (Da) of a protein based on its amino acid sequence. As input, it takes a string of amino acids and returns the molecular weight in Da. +Usage example: +```python +protein_mass('MARY') #593 (in Da) +``` +### amino_acid_profile +This function displays the proportion of hydrophobic, polar, negatively, and positively charged amino acids in the protein. It takes a string of amino acids, and returns a dictionary with the result. +Usage example: +```python +aa_profile('EEKFG') #{'hydrophobic': 0.4, 'polar': 0.0, '- charged': 0.4, '+ charged': 0.2} +``` +### amino_acid_substring +This function searches for the presence of particular amino acid(s) in the entire amino acid sequence. As input, it takes a string of amino acids and a substring that needs to be found. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the position in the original sequence where the searched element was found for the first time. +Usage example: +```python +aa_tools('RNwDeACEQEZ', 'E','amino_acid_substring') #4 +aa_tools('RNwDeACEQEZ', 'DFKAaaE','A','amino_acid_substring') #[5, 3] +``` +### amino_acid_count +This function finds how many times a particular amino acid or sequence of several amino acids occurs in the original sequence. As input, it takes a string of amino acids and a substring that needs to be counted. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the count of searched amino acid(s). +Usage example: +```python +aa_tools('GHcLfKF','f','amino_acid_count') #2 +aa_tools('HILAKMaF', 'GDaKFAAE','A','amino_acid_count') #[2, 3] +``` +### protein_length +This function can analyze an aminoacid sequence and gives a length of it (number of amino acids). Any number of amino acid sequences is possible. All sequences should be comma-separated. As input, it takes a string or strings of amino acids, as an output, the function returns the length of each protein. +Usage example: +```python +aa_tools('KKNNfF', 'KKFFRRVV', 'KK', 'protein_length') #[6, 8, 2] +``` +### essential_amino_acids +This function can analyze an amino acid sequence and gives a list of essential amino acids (in humans) that are present in the sequence. +Any number of amino acid sequences is possible. All sequences should be comma-separated. As input, it takes a string or strings of amino acids, as an output, the function returns essential amino acids for each sequence. +Usage example: +```python +aa_tools('KKNNfF', 'KKFFRRVV', 'KK', 'essential_amino_acids') #[['K', 'K', 'f', 'F'], ['K', 'K', 'F', 'F', 'V', 'V'], ['K', 'K']] +``` + +## Troubleshooting +* In function `'amino_acid_substring'` the position counting starts at 0, so don't be confused if the second element in the sequence has the output [1]. +* In functions `'amino_acid_substring'` and `'amino_acid_count'` output [-1] means that there is no such element in the sequence. +* In functions `'amino_acid_substring'` and `'amino_acid_count'` the error message "name '..' is not defined" means that the given argument is not quoted in the input string. + +## Bibliography +[1] Wu G. Amino acids: metabolism, functions, and nutrition. Amino Acids. 2009 May;37(1):1-17. doi: 10.1007/s00726-009-0269-0. + +## Developers and contacts +* Maria Uzun - contributed to `'amino_acid_substring'`, `'amino_acid_count'`, and `'aa_tools'` functions. +* Maria Babaeva - contributed to `'protein_mass'` and `'amino_acid_profile'` functions. +* Kristina Zhur - contributed to `'protein_length'` and `'essential_amino_acids'` functions. +* Julia the Cat - team's emotional support. + + +![photo_2023-09-26_18-33-49_3](https://github.com/uzunmasha/HW4_Functions2/assets/44806106/63fdea24-5c0a-4650-8bed-181871aa540f) + + +In case of non-working code: + +* Please blame the one who has the paws +* Report any problems directly to the GitHub issue tracker + +or + +* Send your feedback to uzunmasha@gmail.com From 382e4197c7bbb8ead608c76ac83deee2550316ec Mon Sep 17 00:00:00 2001 From: Maria Uzun Date: Sun, 1 Oct 2023 12:07:06 +0300 Subject: [PATCH 27/31] Relocate README file --- README.md | 92 ------------------------------------------------------- 1 file changed, 92 deletions(-) delete mode 100644 README.md diff --git a/README.md b/README.md deleted file mode 100644 index 83e68f5..0000000 --- a/README.md +++ /dev/null @@ -1,92 +0,0 @@ -# AAmigo -This readme describes the user-friendly program AAmigo for performing various operations with amino acid sequences. - -AAmigo can perform different operations: -* Calculate the mass of a protein. -* Calculate the ratio of amino acids with different polarities in a protein -* Find for a particular amino acid(s) in the entire sequence -* Calculate amino acid's occurrence in a sequence -* Calculate amino acid sequence(s) length -* Finds essential amino acids (in humans) in a sequence(s) - -## Usage -1. Clone this repo using SSH or HTTPS: -```bash -git clone git@github.com:uzunmasha/HW4_Functions2.git -``` -**or** -```bash -git clone https://github.com/uzunmasha/HW4_Functions2.git -``` -2. Launch the program with the required function (listed below) in a code interpreter like Jupyter Notebook. -3. Enjoy your results! - -## List of functions: -For all functions, amino acids in the sequences should be indicated as one-letter symbols. Letters can be uppercase or lowercase. - -### protein_mass -This function calculates the mass (Da) of a protein based on its amino acid sequence. As input, it takes a string of amino acids and returns the molecular weight in Da. -Usage example: -```python -protein_mass('MARY') #593 (in Da) -``` -### amino_acid_profile -This function displays the proportion of hydrophobic, polar, negatively, and positively charged amino acids in the protein. It takes a string of amino acids, and returns a dictionary with the result. -Usage example: -```python -aa_profile('EEKFG') #{'hydrophobic': 0.4, 'polar': 0.0, '- charged': 0.4, '+ charged': 0.2} -``` -### amino_acid_substring -This function searches for the presence of particular amino acid(s) in the entire amino acid sequence. As input, it takes a string of amino acids and a substring that needs to be found. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the position in the original sequence where the searched element was found for the first time. -Usage example: -```python -aa_tools('RNwDeACEQEZ', 'E','amino_acid_substring') #4 -aa_tools('RNwDeACEQEZ', 'DFKAaaE','A','amino_acid_substring') #[5, 3] -``` -### amino_acid_count -This function finds how many times a particular amino acid or sequence of several amino acids occurs in the original sequence. As input, it takes a string of amino acids and a substring that needs to be counted. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the count of searched amino acid(s). -Usage example: -```python -aa_tools('GHcLfKF','f','amino_acid_count') #2 -aa_tools('HILAKMaF', 'GDaKFAAE','A','amino_acid_count') #[2, 3] -``` -### protein_length -This function can analyze an aminoacid sequence and gives a length of it (number of amino acids). Any number of amino acid sequences is possible. All sequences should be comma-separated. As input, it takes a string or strings of amino acids, as an output, the function returns the length of each protein. -Usage example: -```python -aa_tools('KKNNfF', 'KKFFRRVV', 'KK', 'protein_length') #[6, 8, 2] -``` -### essential_amino_acids -This function can analyze an amino acid sequence and gives a list of essential amino acids (in humans) that are present in the sequence. -Any number of amino acid sequences is possible. All sequences should be comma-separated. As input, it takes a string or strings of amino acids, as an output, the function returns essential amino acids for each sequence. -Usage example: -```python -aa_tools('KKNNfF', 'KKFFRRVV', 'KK', 'essential_amino_acids') #[['K', 'K', 'f', 'F'], ['K', 'K', 'F', 'F', 'V', 'V'], ['K', 'K']] -``` - -## Troubleshooting -* In function `'amino_acid_substring'` the position counting starts at 0, so don't be confused if the second element in the sequence has the output [1]. -* In functions `'amino_acid_substring'` and `'amino_acid_count'` output [-1] means that there is no such element in the sequence. -* In functions `'amino_acid_substring'` and `'amino_acid_count'` the error message "name '..' is not defined" means that the given argument is not quoted in the input string. - -## Bibliography -[1] Wu G. Amino acids: metabolism, functions, and nutrition. Amino Acids. 2009 May;37(1):1-17. doi: 10.1007/s00726-009-0269-0. - -## Developers and contacts -* Maria Uzun - contributed to `'amino_acid_substring'`, `'amino_acid_count'`, and `'aa_tools'` functions. -* Maria Babaeva - contributed to `'protein_mass'` and `'amino_acid_profile'` functions. -* Kristina Zhur - contributed to `'protein_length'` and `'essential_amino_acids'` functions. -* Julia the Cat - team's emotional support. - - -![photo_2023-09-26_18-33-49_3](https://github.com/uzunmasha/HW4_Functions2/assets/44806106/63fdea24-5c0a-4650-8bed-181871aa540f) - - -In case of non-working code: - -* Please blame the one who has the paws -* Report any problems directly to the GitHub issue tracker - -or - -* Send your feedback to uzunmasha@gmail.com From 34311350fa6eafd40659bd50d4868ee9caedfde2 Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sun, 1 Oct 2023 12:11:54 +0300 Subject: [PATCH 28/31] Update README.md --- HW4_Uzun/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/HW4_Uzun/README.md b/HW4_Uzun/README.md index 83e68f5..f574dac 100644 --- a/HW4_Uzun/README.md +++ b/HW4_Uzun/README.md @@ -28,13 +28,13 @@ For all functions, amino acids in the sequences should be indicated as one-lette This function calculates the mass (Da) of a protein based on its amino acid sequence. As input, it takes a string of amino acids and returns the molecular weight in Da. Usage example: ```python -protein_mass('MARY') #593 (in Da) +aa_tools('MARY', 'amino_acid_substring') #593 (in Da) ``` ### amino_acid_profile This function displays the proportion of hydrophobic, polar, negatively, and positively charged amino acids in the protein. It takes a string of amino acids, and returns a dictionary with the result. Usage example: ```python -aa_profile('EEKFG') #{'hydrophobic': 0.4, 'polar': 0.0, '- charged': 0.4, '+ charged': 0.2} +aa_tools('EEKFG', 'amino_acid_profile') #{'hydrophobic': 0.4, 'polar': 0.0, '- charged': 0.4, '+ charged': 0.2} ``` ### amino_acid_substring This function searches for the presence of particular amino acid(s) in the entire amino acid sequence. As input, it takes a string of amino acids and a substring that needs to be found. All sequences and subsequence should be comma-separated. Any number of amino acid sequences is possible. The searched substring should be one and it should be pointed last. As an output, the function returns the position in the original sequence where the searched element was found for the first time. From 08bc52d7fa64eacd15f0718bed814cd4351e1bf2 Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sun, 1 Oct 2023 12:19:01 +0300 Subject: [PATCH 29/31] Update README.md --- HW4_Uzun/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HW4_Uzun/README.md b/HW4_Uzun/README.md index f574dac..9f00ab7 100644 --- a/HW4_Uzun/README.md +++ b/HW4_Uzun/README.md @@ -28,7 +28,7 @@ For all functions, amino acids in the sequences should be indicated as one-lette This function calculates the mass (Da) of a protein based on its amino acid sequence. As input, it takes a string of amino acids and returns the molecular weight in Da. Usage example: ```python -aa_tools('MARY', 'amino_acid_substring') #593 (in Da) +aa_tools('MARY', 'protein_mass') #593 (in Da) ``` ### amino_acid_profile This function displays the proportion of hydrophobic, polar, negatively, and positively charged amino acids in the protein. It takes a string of amino acids, and returns a dictionary with the result. From 4d8774b3fcaf688482dbd0cf757385465cc9f2ff Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sun, 1 Oct 2023 12:23:26 +0300 Subject: [PATCH 30/31] Correct code quality --- HW4_Uzun/AAmigo.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 10c68b0..01ffaf6 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -11,8 +11,8 @@ def protein_mass(seq: str): 'G': 75, 'H': 155, 'I': 131, 'L': 131, 'K': 146, 'M': 149, 'F': 165, 'P': 115, 'S': 105, 'T': 119, 'W': 204, 'Y': 181, 'V': 117}) mass = 0 - for aa in aa_seq: - mass += mass_dictionary[aa] + for amino_acid in aa_seq: + mass += mass_dictionary[amino_acid] return mass @@ -31,9 +31,9 @@ def amino_acid_profile(seq: str): '- charged': ['E', 'D'], '+ charged': ['K', 'H', 'R']}) profile = dict({'hydrophobic': 0, 'polar': 0, '- charged': 0, '+ charged': 0}) - for aa in aa_seq: + for amino_acid in aa_seq: for group_name, group_list in aa_biochemistry.items(): - if aa in group_list: + if amino_acid in group_list: profile[group_name] += 1 for group, count in profile.items(): @@ -82,10 +82,11 @@ def amino_acid_count(seq: str): substring = aa_seq_upper[-1] results = [] for sequences in amino_acids: - amino_acid_count = sequences.count(substring) - results.append(amino_acid_count) + aa_count = sequences.count(substring) + results.append(aa_count) return results + def protein_length(*seq: str): """ @@ -178,4 +179,3 @@ def aa_tools(*args): aa_tools() - From 0947cb9e55e08aa9a56d5329143bed888c2cc862 Mon Sep 17 00:00:00 2001 From: Maria Uzun <44806106+uzunmasha@users.noreply.github.com> Date: Sun, 1 Oct 2023 12:32:39 +0300 Subject: [PATCH 31/31] Correct incompatible types issue --- HW4_Uzun/AAmigo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HW4_Uzun/AAmigo.py b/HW4_Uzun/AAmigo.py index 01ffaf6..96e2722 100644 --- a/HW4_Uzun/AAmigo.py +++ b/HW4_Uzun/AAmigo.py @@ -29,7 +29,7 @@ def amino_acid_profile(seq: str): aa_biochemistry = dict( {'hydrophobic': ['G', 'A', 'V', 'L', 'I', 'P', 'F', 'M', 'W'], 'polar': ['S', 'T', 'C', 'N', 'Q', 'Y'], '- charged': ['E', 'D'], '+ charged': ['K', 'H', 'R']}) - profile = dict({'hydrophobic': 0, 'polar': 0, '- charged': 0, '+ charged': 0}) + profile = dict({'hydrophobic': 0.0, 'polar': 0.0, '- charged': 0.0, '+ charged': 0.0}) for amino_acid in aa_seq: for group_name, group_list in aa_biochemistry.items():