diff --git a/Merge.py b/Merge.py new file mode 100644 index 0000000..148e41a --- /dev/null +++ b/Merge.py @@ -0,0 +1,33 @@ +def divide(sort_result, list_chr_names): + divided_by_chr = [] + for i in list_chr_names: + name = i + i = list() + for j in range(len(sort_result)): + if sort_result[j][0] == name: + i.append(sort_result[j]) + divided_by_chr.append(i) + return divided_by_chr + + +def merge_by_chr(temp_tuple): + merged = [temp_tuple[0]] + for current in temp_tuple: + previous = merged[-1] + if current[1] <= previous[2]: + previous[2] = max(previous[2], current[2]) + else: + merged.append(current) + return merged + + +def merge(bed_entries: list): + result = list() + names = set() + for i in bed_entries: + names.add(i[0]) + names = sorted(list(names)) + divided_by_chr = divide(bed_entries, names) + for i in divided_by_chr: + result.append(merge_by_chr(i)) + return result diff --git a/Parse.py b/Parse.py new file mode 100644 index 0000000..4b9df2e --- /dev/null +++ b/Parse.py @@ -0,0 +1,15 @@ +def parse(bed_file): + + def check(our_line): + try: + our_line[1] = int(our_line[1]) + our_line[2] = int(our_line[2]) + except ValueError: + raise ValueError("{0} is not an int".format(our_line)) + return our_line + + result = list() + with open(bed_file, 'r') as file: + for line in file: + result.append(check(line.split())) + return result diff --git a/Sort.py b/Sort.py new file mode 100644 index 0000000..2e916ed --- /dev/null +++ b/Sort.py @@ -0,0 +1,2 @@ +def sort(bed_entries: list): + return sorted(bed_entries, key=lambda x: (x[0], x[1]))