From e46f5a55cc325325b36cdc350f7d00da62e09428 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Thu, 11 Oct 2018 02:20:12 +0300 Subject: [PATCH 01/16] Script for bed parsing has added --- Parsing.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 Parsing.py diff --git a/Parsing.py b/Parsing.py new file mode 100644 index 0000000..e3c7aad --- /dev/null +++ b/Parsing.py @@ -0,0 +1,22 @@ +def parsing(bed_file): + def int_error(our_int: str): + if our_int.isdigit() is False: + raise ValueError("{0} is not a integer".format(our_int)) + def str_error(our_str: str): + if isinstance(our_str, str) is False: + raise ValueError("{0} is not a integer".format(our_str)) + def checking(our_line: list): + str_error(our_line[0]) + int_error(our_line[1]) + int_error(our_line[2]) + try: + str_error(our_line[3]) + except IndexError: + pass + result = list(tuple()) + with open(bed_file, 'r') as file: + for line in file: + if 'description' not in line: + checking(line.split()[0:-1]) + result.append(line.split()[0:-1]) + return result \ No newline at end of file From c601f9356e4e80d8cbf9e957bb36410b494b6e12 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Thu, 11 Oct 2018 11:23:15 +0300 Subject: [PATCH 02/16] Parsing script has changed in accordance with code review --- Parse.py | 24 ++++++++++++++++++++++++ Parsing.py | 22 ---------------------- 2 files changed, 24 insertions(+), 22 deletions(-) create mode 100644 Parse.py delete mode 100644 Parsing.py diff --git a/Parse.py b/Parse.py new file mode 100644 index 0000000..fd40499 --- /dev/null +++ b/Parse.py @@ -0,0 +1,24 @@ +def parse(bed_file): + def int_error(our_int: str): + if our_int.isdigit() is False: + raise ValueError("{0} is not an int".format(our_int)) + def str_error(our_str: str): + if isinstance(our_str, str) is False: + raise ValueError("{0} is not a str".format(our_str)) + def check(our_line: list): + str_error(our_line[0]) + int_error(our_line[1]) + int_error(our_line[2]) + if len(our_line) == 4: + str_error(our_line[3]) + return our_line + def convert(our_line: list): + our_line[1] = int(our_line[1]) + our_line[2] = int(our_line[2]) + return our_line + result = list(tuple()) + with open(bed_file, 'r') as file: + for line in file: + if 'description' not in line: + result.append(convert(check(line.split()[0:-1]))) + return result \ No newline at end of file diff --git a/Parsing.py b/Parsing.py deleted file mode 100644 index e3c7aad..0000000 --- a/Parsing.py +++ /dev/null @@ -1,22 +0,0 @@ -def parsing(bed_file): - def int_error(our_int: str): - if our_int.isdigit() is False: - raise ValueError("{0} is not a integer".format(our_int)) - def str_error(our_str: str): - if isinstance(our_str, str) is False: - raise ValueError("{0} is not a integer".format(our_str)) - def checking(our_line: list): - str_error(our_line[0]) - int_error(our_line[1]) - int_error(our_line[2]) - try: - str_error(our_line[3]) - except IndexError: - pass - result = list(tuple()) - with open(bed_file, 'r') as file: - for line in file: - if 'description' not in line: - checking(line.split()[0:-1]) - result.append(line.split()[0:-1]) - return result \ No newline at end of file From 6241be1004164ed645aca1d8259553528dd6784f Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Thu, 11 Oct 2018 11:27:43 +0300 Subject: [PATCH 03/16] Parsing script has changed in accordance with code review --- Parse.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Parse.py b/Parse.py index fd40499..f1a230d 100644 --- a/Parse.py +++ b/Parse.py @@ -2,9 +2,11 @@ def parse(bed_file): def int_error(our_int: str): if our_int.isdigit() is False: raise ValueError("{0} is not an int".format(our_int)) + def str_error(our_str: str): if isinstance(our_str, str) is False: raise ValueError("{0} is not a str".format(our_str)) + def check(our_line: list): str_error(our_line[0]) int_error(our_line[1]) @@ -12,13 +14,15 @@ def check(our_line: list): if len(our_line) == 4: str_error(our_line[3]) return our_line + def convert(our_line: list): our_line[1] = int(our_line[1]) our_line[2] = int(our_line[2]) return our_line + result = list(tuple()) with open(bed_file, 'r') as file: for line in file: if 'description' not in line: result.append(convert(check(line.split()[0:-1]))) - return result \ No newline at end of file + return result From 19716a751a7d3805cd52f2e90f20ce2f2ca0a32a Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Fri, 12 Oct 2018 00:36:04 +0300 Subject: [PATCH 04/16] Remove some function due to its --- Parse.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/Parse.py b/Parse.py index f1a230d..de17871 100644 --- a/Parse.py +++ b/Parse.py @@ -1,28 +1,15 @@ def parse(bed_file): - def int_error(our_int: str): - if our_int.isdigit() is False: - raise ValueError("{0} is not an int".format(our_int)) - - def str_error(our_str: str): - if isinstance(our_str, str) is False: - raise ValueError("{0} is not a str".format(our_str)) def check(our_line: list): - str_error(our_line[0]) - int_error(our_line[1]) - int_error(our_line[2]) - if len(our_line) == 4: - str_error(our_line[3]) - return our_line - - def convert(our_line: list): - our_line[1] = int(our_line[1]) - our_line[2] = int(our_line[2]) + try: + our_line[1] = int(our_line[1]) + our_line[2] = int(our_line[2]) + except ValueError: + raise ValueError("{0} is not an int".format(our_line)) return our_line result = list(tuple()) with open(bed_file, 'r') as file: for line in file: - if 'description' not in line: - result.append(convert(check(line.split()[0:-1]))) + result.append(check(line.split()[0:-1])) return result From 0d3d6567166e094c0dd0bad1f037263c34cb3567 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Fri, 12 Oct 2018 00:39:28 +0300 Subject: [PATCH 05/16] Remove some function due to its redundancy --- Parse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Parse.py b/Parse.py index de17871..6e72cd5 100644 --- a/Parse.py +++ b/Parse.py @@ -1,6 +1,6 @@ def parse(bed_file): - def check(our_line: list): + def check_int(our_line: list) -> list: try: our_line[1] = int(our_line[1]) our_line[2] = int(our_line[2]) @@ -11,5 +11,5 @@ def check(our_line: list): result = list(tuple()) with open(bed_file, 'r') as file: for line in file: - result.append(check(line.split()[0:-1])) + result.append(check_int(line.split()[0:-1])) return result From 0b03d271aa7b19fd4f7c01af35b52c049fb07497 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Fri, 12 Oct 2018 00:42:07 +0300 Subject: [PATCH 06/16] Remove some function due to its redundancy --- Parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Parse.py b/Parse.py index 6e72cd5..0ce69d3 100644 --- a/Parse.py +++ b/Parse.py @@ -8,7 +8,7 @@ def check_int(our_line: list) -> list: raise ValueError("{0} is not an int".format(our_line)) return our_line - result = list(tuple()) + result = list() with open(bed_file, 'r') as file: for line in file: result.append(check_int(line.split()[0:-1])) From 467cd1bf11b61b0c1d706cc9f34bb546ffe44fa1 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Sat, 20 Oct 2018 10:39:20 +0300 Subject: [PATCH 07/16] Changes has added --- Parse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Parse.py b/Parse.py index 0ce69d3..c4b03a9 100644 --- a/Parse.py +++ b/Parse.py @@ -1,6 +1,6 @@ def parse(bed_file): - def check_int(our_line: list) -> list: + def check_int(our_line): try: our_line[1] = int(our_line[1]) our_line[2] = int(our_line[2]) @@ -11,5 +11,5 @@ def check_int(our_line: list) -> list: result = list() with open(bed_file, 'r') as file: for line in file: - result.append(check_int(line.split()[0:-1])) + result.append(check_int(line.split())) return result From 338d32560667448d0a6b9f31a7da90fe36ea053a Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Tue, 6 Nov 2018 16:00:03 +0300 Subject: [PATCH 08/16] Sort.py added --- Sort.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 Sort.py diff --git a/Sort.py b/Sort.py new file mode 100644 index 0000000..6083d15 --- /dev/null +++ b/Sort.py @@ -0,0 +1,44 @@ +ort(bed_file): + def check(our_line): + try: + our_line[1] = int(our_line[1]) + our_line[2] = int(our_line[2]) + except ValueError: + raise ValueError("{0} is not an int".format(our_line)) + return our_line + + def divide(sorted_file, chr_names): + list_chr_names = list(chr_names) + result = [] + for i in list_chr_names: + name = i + i = list() + for j in range(len(sorted_file)): + if sorted_file[j][0] == name: + i.append(sorted_file[j]) + result.append(i) + return result + + def sort_by_chr(our_list): + our_list.sort(key=lambda x: x[0]) + return our_list + + def sort_by_start(sorted_file): + sorted_file.sort(key=lambda x: x[1]) + return sorted_file + + result = list() + chr_names = set() + with open(bed_file, 'r') as file: + for line in file: + result.append(check(line.split())) + chr_names.add(line.split()[0]) + result = divide(result, chr_names) + sorted_by_start = [] + concatenated_list = [] + for i in result: + sorted_by_start.append(sort_by_start(i)) + for i in sorted_by_start: + concatenated_list += i + chr_and_pos_sorted = sort_by_chr(concatenated_list) + return chr_and_pos_sorted From c4ed48d5f41d77138b2cd7c759c802094d9b1860 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Tue, 6 Nov 2018 16:08:33 +0300 Subject: [PATCH 09/16] Sort.py added --- Sort.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Sort.py b/Sort.py index 6083d15..28a0c44 100644 --- a/Sort.py +++ b/Sort.py @@ -1,4 +1,5 @@ -ort(bed_file): +def bed_sort(bed_file): + def check(our_line): try: our_line[1] = int(our_line[1]) @@ -6,7 +7,7 @@ def check(our_line): except ValueError: raise ValueError("{0} is not an int".format(our_line)) return our_line - + def divide(sorted_file, chr_names): list_chr_names = list(chr_names) result = [] @@ -18,15 +19,15 @@ def divide(sorted_file, chr_names): i.append(sorted_file[j]) result.append(i) return result - + def sort_by_chr(our_list): our_list.sort(key=lambda x: x[0]) return our_list - + def sort_by_start(sorted_file): sorted_file.sort(key=lambda x: x[1]) return sorted_file - + result = list() chr_names = set() with open(bed_file, 'r') as file: From 3ae0b07b6ba112a44569839570ea0bcf47a83e45 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Tue, 6 Nov 2018 16:14:32 +0300 Subject: [PATCH 10/16] Function name was changed --- Parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Parse.py b/Parse.py index c4b03a9..b23663e 100644 --- a/Parse.py +++ b/Parse.py @@ -1,6 +1,6 @@ def parse(bed_file): - def check_int(our_line): + def check(our_line): try: our_line[1] = int(our_line[1]) our_line[2] = int(our_line[2]) From 1a95ebd508abbd36d11da64e1c74b588cc27e166 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Tue, 6 Nov 2018 16:17:37 +0300 Subject: [PATCH 11/16] Function name was changed --- Parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Parse.py b/Parse.py index b23663e..4b9df2e 100644 --- a/Parse.py +++ b/Parse.py @@ -11,5 +11,5 @@ def check(our_line): result = list() with open(bed_file, 'r') as file: for line in file: - result.append(check_int(line.split())) + result.append(check(line.split())) return result From 772eeffac69bf61a5fb95f6fe4baca45e75c4f7e Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Tue, 6 Nov 2018 23:57:26 +0300 Subject: [PATCH 12/16] Input for sort function was corrected --- Sort.py | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/Sort.py b/Sort.py index 28a0c44..fd96cee 100644 --- a/Sort.py +++ b/Sort.py @@ -1,24 +1,17 @@ -def bed_sort(bed_file): +def sort(bed_entries: list): - def check(our_line): - try: - our_line[1] = int(our_line[1]) - our_line[2] = int(our_line[2]) - except ValueError: - raise ValueError("{0} is not an int".format(our_line)) - return our_line - def divide(sorted_file, chr_names): + def divide(parse_result, chr_names): list_chr_names = list(chr_names) - result = [] + divided_by_chr = [] for i in list_chr_names: name = i i = list() - for j in range(len(sorted_file)): - if sorted_file[j][0] == name: - i.append(sorted_file[j]) - result.append(i) - return result + for j in range(len(parse_result)): + if parse_result[j][0] == name: + i.append(parse_result[j]) + divided_by_chr.append(i) + return divided_by_chr def sort_by_chr(our_list): our_list.sort(key=lambda x: x[0]) @@ -28,13 +21,10 @@ def sort_by_start(sorted_file): sorted_file.sort(key=lambda x: x[1]) return sorted_file - result = list() chr_names = set() - with open(bed_file, 'r') as file: - for line in file: - result.append(check(line.split())) - chr_names.add(line.split()[0]) - result = divide(result, chr_names) + for i in bed_entries: + chr_names.add(i[0]) + result = divide(bed_entries, chr_names) sorted_by_start = [] concatenated_list = [] for i in result: @@ -42,4 +32,4 @@ def sort_by_start(sorted_file): for i in sorted_by_start: concatenated_list += i chr_and_pos_sorted = sort_by_chr(concatenated_list) - return chr_and_pos_sorted + return chr_and_pos_sorted \ No newline at end of file From 2e4c108732abe2fd4c5b429a0248d9f294c0a543 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Wed, 7 Nov 2018 00:00:42 +0300 Subject: [PATCH 13/16] Input for sort function was corrected --- Sort.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Sort.py b/Sort.py index fd96cee..2e5eaf2 100644 --- a/Sort.py +++ b/Sort.py @@ -1,6 +1,4 @@ def sort(bed_entries: list): - - def divide(parse_result, chr_names): list_chr_names = list(chr_names) divided_by_chr = [] @@ -32,4 +30,4 @@ def sort_by_start(sorted_file): for i in sorted_by_start: concatenated_list += i chr_and_pos_sorted = sort_by_chr(concatenated_list) - return chr_and_pos_sorted \ No newline at end of file + return chr_and_pos_sorted From 1566145d36c4fe42f28531a8e0cfb58868801cb0 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Sun, 18 Nov 2018 00:31:51 +0300 Subject: [PATCH 14/16] Use only one lambda function for sorting --- Sort.py | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/Sort.py b/Sort.py index 2e5eaf2..2e916ed 100644 --- a/Sort.py +++ b/Sort.py @@ -1,33 +1,2 @@ def sort(bed_entries: list): - def divide(parse_result, chr_names): - list_chr_names = list(chr_names) - divided_by_chr = [] - for i in list_chr_names: - name = i - i = list() - for j in range(len(parse_result)): - if parse_result[j][0] == name: - i.append(parse_result[j]) - divided_by_chr.append(i) - return divided_by_chr - - def sort_by_chr(our_list): - our_list.sort(key=lambda x: x[0]) - return our_list - - def sort_by_start(sorted_file): - sorted_file.sort(key=lambda x: x[1]) - return sorted_file - - chr_names = set() - for i in bed_entries: - chr_names.add(i[0]) - result = divide(bed_entries, chr_names) - sorted_by_start = [] - concatenated_list = [] - for i in result: - sorted_by_start.append(sort_by_start(i)) - for i in sorted_by_start: - concatenated_list += i - chr_and_pos_sorted = sort_by_chr(concatenated_list) - return chr_and_pos_sorted + return sorted(bed_entries, key=lambda x: (x[0], x[1])) From 74a74dd3fdf2e603a7b79f9e382f76ff21fde278 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Wed, 21 Nov 2018 15:03:56 +0300 Subject: [PATCH 15/16] Merge function added --- Merge.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 Merge.py diff --git a/Merge.py b/Merge.py new file mode 100644 index 0000000..cc4e755 --- /dev/null +++ b/Merge.py @@ -0,0 +1,34 @@ +def divide(sort_result, list_chr_names): + divided_by_chr = [] + for i in list_chr_names: + name = i + i = list() + for j in range(len(sort_result)): + if sort_result[j][0] == name: + i.append(sort_result[j]) + divided_by_chr.append(i) + return divided_by_chr + + +def merge_by_chr(temp_tuple): + merged = [temp_tuple[0]] + for current in temp_tuple: + previous = merged[-1] + if current[1] <= previous[2]: + previous[2] = max(previous[2], current[2]) + else: + merged.append(current) + return merged + + +def merge(bed_entries: list): + result = list() + names = set() + for i in bed_entries: + names.add(i[0]) + names = sorted(list(names)) + divided_by_chr = divide(bed_entries, names) + for i in divided_by_chr: + result.append(merge_by_chr(i)) + return result + From ca212f7be0dac34fbaa9b66874e478e46bd9a9a9 Mon Sep 17 00:00:00 2001 From: Maria Firulyova Date: Wed, 21 Nov 2018 15:08:13 +0300 Subject: [PATCH 16/16] Delete blank lines from the end of the file (except 1) --- Merge.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Merge.py b/Merge.py index cc4e755..148e41a 100644 --- a/Merge.py +++ b/Merge.py @@ -31,4 +31,3 @@ def merge(bed_entries: list): for i in divided_by_chr: result.append(merge_by_chr(i)) return result -