From 77e189cb6e89fb7a6aa505a3f94ae7f772bab9be Mon Sep 17 00:00:00 2001 From: eksytnik Date: Thu, 11 Oct 2018 11:59:29 +0300 Subject: [PATCH 1/2] Parser file added. --- pybedtools_parser.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 pybedtools_parser.py diff --git a/pybedtools_parser.py b/pybedtools_parser.py new file mode 100644 index 0000000..b9be638 --- /dev/null +++ b/pybedtools_parser.py @@ -0,0 +1,28 @@ +import sys + + +def parsing_bed(bed_file): + parsed_data = list() + with open(bed_file) as input_handle: + for line in input_handle: + line_elements = line.split() + # Ignore annotation lines. + if any(i in line_elements for i in ("#", "browser", "track")): + continue + # Check if a non-annotation line if valid. + elif len(line_elements) < 3: + sys.exit("Program expects a valid BED file with at least 3 columns (chromosome, start and end).\n" + "Problem in line: %s" % line) + elif not (line_elements[1].isdigit() and line_elements[2].isdigit()): + sys.exit("Program expects starting and ending positions to be valid numbers (integer).\n" + "Problem in line: %s" % line) + # End of check + else: + # Give "None" name if file is BED3 or doesn't contain name option for other BED types. + if len(line_elements) == 3 or line_elements[3].isdigit(): + line_elements.insert(3, "None") + # Covert start and end parameters to int. + for i in [1, 2]: + line_elements[i] = int(line_elements[i]) + parsed_data.append(tuple(line_elements[0:5])) + return parsed_data From 22967a89ea9c657282b4c468902e2964d800fb1f Mon Sep 17 00:00:00 2001 From: eksytnik Date: Tue, 6 Nov 2018 17:38:41 +0300 Subject: [PATCH 2/2] Parser file edited --- pybedtools_parser.py | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/pybedtools_parser.py b/pybedtools_parser.py index b9be638..b93ff4c 100644 --- a/pybedtools_parser.py +++ b/pybedtools_parser.py @@ -1,28 +1,32 @@ -import sys +def parsing_bed(bed_file): + def check_int(line): + try: + line[1], line[2] = int(line[1]), int(line[2]) + except ValueError: + raise ValueError("Program expects starting and " + "ending positions to be valid " + "numbers (integer). Check line: {} " + .format(' '.join(line))) + return line + def check_len(line): + try: + line[2] + except IndexError: + raise IndexError("Program expects each line to " + "have chromosome name, starting " + "and ending position. Check line: {} " + .format(' '.join(line))) + return line -def parsing_bed(bed_file): parsed_data = list() with open(bed_file) as input_handle: for line in input_handle: line_elements = line.split() # Ignore annotation lines. - if any(i in line_elements for i in ("#", "browser", "track")): + if any(el in line_elements for el in ("#", "browser", "track")): continue - # Check if a non-annotation line if valid. - elif len(line_elements) < 3: - sys.exit("Program expects a valid BED file with at least 3 columns (chromosome, start and end).\n" - "Problem in line: %s" % line) - elif not (line_elements[1].isdigit() and line_elements[2].isdigit()): - sys.exit("Program expects starting and ending positions to be valid numbers (integer).\n" - "Problem in line: %s" % line) - # End of check - else: - # Give "None" name if file is BED3 or doesn't contain name option for other BED types. - if len(line_elements) == 3 or line_elements[3].isdigit(): - line_elements.insert(3, "None") - # Covert start and end parameters to int. - for i in [1, 2]: - line_elements[i] = int(line_elements[i]) - parsed_data.append(tuple(line_elements[0:5])) + check_len(line_elements) + check_int(line_elements) + parsed_data.append(tuple(line_elements[0:5])) return parsed_data