From 3dd461e252211a3aa336361651dbc079d36fb6c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Mokrej=C5=A1?= Date: Thu, 25 Jan 2018 19:58:54 +0100 Subject: [PATCH 1/2] Support multiple input SAM/BAM files Iterate over all input files while increasing internal counters and after that print out the statistics. --- src/bin/sga-astat.py | 70 ++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/src/bin/sga-astat.py b/src/bin/sga-astat.py index 819b6a2c..a0ef71f3 100755 --- a/src/bin/sga-astat.py +++ b/src/bin/sga-astat.py @@ -30,8 +30,8 @@ def computeAStat(arrivalRate, len, n): arrivalRate = 0 def usage(): - print 'usage: sga-astat.py in.bam' - print 'Compute Myers\' a-statistic for a set of contigs using the read alignments in in.bam' + print 'usage: sga-astat.py [OPTIONS] *.bam' + print 'Compute Myers\' a-statistic for a set of contigs using the read alignments from at least one SAM/BAM file' print 'Options:' print ' -m=INT only compute a-stat for contigs at least INT bases in length' print ' -b=INT use the longest INT contigs to perform the initial estimate' @@ -62,53 +62,53 @@ def usage(): usage() sys.exit(1) -if len(args) == 0: - print 'Error: a BAM file must be provided\n' +if not args: + print 'Error: at least one SAM/BAM file must be provided\n' usage() sys.exit(2) -bamFilename = args[0] - -sys.stderr.write('Reading alignments from ' + bamFilename + '\n') contigData = list() - -# Read the contig names and lengths from the bam -bamFile = pysam.Samfile(bamFilename, "rb") - -for (name, length) in zip(bamFile.references, bamFile.lengths): - t = name, length, 0 - contigData.append(ContigData(name, length)) - #print 'Name: ' + name - #print 'Length: ' + str(len) - -# Read the alignments and populate the counts totalReads = 0 sumReadLength = 0 - last_ref_idx = -1 last_pos = -1 -for alignment in bamFile: +for bamFilename in args: + sys.stderr.write('Reading alignments from ' + bamFilename + '\n') + + # Read the contig names and lengths from the bam + bamFile = pysam.Samfile(bamFilename, "rb") + + for (name, length) in zip(bamFile.references, bamFile.lengths): + t = name, length, 0 + contigData.append(ContigData(name, length)) + #print 'Name: ' + name + #print 'Length: ' + str(len) + + # Read the alignments and populate the counts + for alignment in bamFile: + + if alignment.is_unmapped: + continue - if alignment.is_unmapped: - continue + ref_idx = alignment.rname + ref_name = bamFile.getrname(ref_idx) - ref_idx = alignment.rname - ref_name = bamFile.getrname(ref_idx) + pos = alignment.pos - pos = alignment.pos + if ref_idx == last_ref_idx and pos == last_pos and not bKeepDuplicates: + continue - if ref_idx == last_ref_idx and pos == last_pos and not bKeepDuplicates: - continue + # Update the count + cd = contigData[ref_idx] + cd.n += 1 + totalReads += 1 + sumReadLength += alignment.rlen + assert(cd.name == ref_name) + last_ref_idx = ref_idx + last_pos = pos - # Update the count - cd = contigData[ref_idx] - cd.n += 1 - totalReads += 1 - sumReadLength += alignment.rlen - assert(cd.name == ref_name) - last_ref_idx = ref_idx - last_pos = pos + bamFile.close() avgReadLen = sumReadLength / totalReads contigData.sort(key=lambda c : c.len, reverse=True) From e6565dc3d3ca747a2f6e659f62d20235efb111a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Mokrej=C5=A1?= Date: Thu, 25 Jan 2018 20:39:34 +0100 Subject: [PATCH 2/2] Fix typo in INVERSESUFFIXARRAH_H (issue #109) --- src/SuffixTools/InverseSuffixArray.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SuffixTools/InverseSuffixArray.h b/src/SuffixTools/InverseSuffixArray.h index 0d8c4ab2..dd3cc5e8 100644 --- a/src/SuffixTools/InverseSuffixArray.h +++ b/src/SuffixTools/InverseSuffixArray.h @@ -8,7 +8,7 @@ // Implemented as a Vector of Vectors // #ifndef INVERSESUFFIXARRAY_H -#define INVERSESUFFIXARRAH_H +#define INVERSESUFFIXARRAY_H #include "STCommon.h" #include "SuffixArray.h"