Skip to content

Commit b6cc640

Browse files
committed
test: add tests for empty SEQ handling, bump to v1.2.0
- Add 3 tests for empty SEQ records in base-depth pileup code path - Bump version to 1.2.0 - Add changelog entry for PR #92 - Add README note about empty SEQ handling
1 parent 90e64d5 commit b6cc640

File tree

5 files changed

+237
-2
lines changed

5 files changed

+237
-2
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# CHANGELOG
22

3+
## 1.2.0
4+
5+
- Fix: Handle BAM records with empty SEQ fields (`*` in SAM format) in `base-depth`. Previously this would cause an out-of-bounds error. Now these reads still count toward depth, and their bases are counted as `N`. ([#92](https://github.com/sstadick/perbase/pull/92) by @ghuls)
6+
37
## 1.1.0
48

59
- Fix/Feat: For both MapQualBaseQualN and BaseQualMapQualN only resolve to N when the bases are ambiguous, otherwise return the consensus base.

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "perbase"
3-
version = "1.1.0"
3+
version = "1.2.0"
44
authors = ["Seth Stadick <sstadick@gmail.com>"]
55
edition = "2024"
66
license = "MIT"

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ You can also download a binary from the [releases](https://github.com/sstadick/p
3434

3535
The `base-depth` tool walks over every position in the BAM/CRAM file and calculates the depth, as well as the number of each nucleotide at the given position. Additionally, it counts the numbers of Ins/Dels at each position.
3636

37+
**Note on empty SEQ records:** BAM records with empty SEQ fields (represented as `*` in SAM format) are handled gracefully. These reads still count toward depth, but since the actual nucleotide cannot be determined, they are counted as `N`.
38+
3739
The output columns are as follows:
3840

3941
| Column | Description |

src/lib/position/pileup_position.rs

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,235 @@ mod tests {
519519
}
520520
}
521521

522+
/// Test that reads with empty SEQ (represented as `*` in SAM format) are handled correctly
523+
/// in the pileup code path used by base-depth. Empty SEQ reads should still count toward
524+
/// depth, but their bases should be counted as N. This tests the fix for issue #91.
525+
#[test]
526+
fn test_empty_seq_pileup() {
527+
use rust_htslib::bam::{IndexedReader, Writer, index};
528+
use tempfile::tempdir;
529+
530+
let tempdir = tempdir().unwrap();
531+
let bam_path = tempdir.path().join("empty_seq.bam");
532+
533+
// Create header
534+
let mut header = bam::header::Header::new();
535+
let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
536+
chr1.push_tag(b"SN", &"chr1".to_owned());
537+
chr1.push_tag(b"LN", &"100".to_owned());
538+
header.push_record(&chr1);
539+
let view = bam::HeaderView::from_header(&header);
540+
541+
// Create records including one with empty SEQ
542+
// Normal read with 'A' bases at positions 1-25
543+
let normal_record = Record::from_sam(
544+
&view,
545+
b"NORMAL\t0\tchr1\t1\t40\t25M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################",
546+
)
547+
.unwrap();
548+
549+
// Read with empty SEQ at positions 10-35 - should count toward depth and N count
550+
let empty_seq_record = Record::from_sam(
551+
&view,
552+
b"EMPTY_SEQ\t0\tchr1\t10\t40\t25M\t*\t0\t0\t*\t*",
553+
)
554+
.unwrap();
555+
556+
// Another normal read with 'G' bases at positions 15-40
557+
let normal_record2 = Record::from_sam(
558+
&view,
559+
b"NORMAL2\t0\tchr1\t15\t40\t25M\t*\t0\t0\tGGGGGGGGGGGGGGGGGGGGGGGGG\t#########################",
560+
)
561+
.unwrap();
562+
563+
// Write BAM file
564+
let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
565+
writer.write(&normal_record).unwrap();
566+
writer.write(&empty_seq_record).unwrap();
567+
writer.write(&normal_record2).unwrap();
568+
drop(writer);
569+
570+
// Index the BAM file
571+
index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
572+
573+
// Read back and create pileup
574+
let mut reader = IndexedReader::from_path(&bam_path).unwrap();
575+
let header_view = reader.header().clone();
576+
577+
let read_filter = DefaultReadFilter::new(0, 0, 0);
578+
579+
// Test at position 14 (0-based) = position 15 (1-based)
580+
// At this position: NORMAL (A), EMPTY_SEQ (*), NORMAL2 (G) = depth 3
581+
reader.fetch(("chr1", 14, 15)).unwrap();
582+
let pileup_iter = reader.pileup();
583+
584+
for pileup_result in pileup_iter {
585+
let pileup = pileup_result.unwrap();
586+
if pileup.pos() == 14 {
587+
let position = PileupPosition::from_pileup(pileup, &header_view, &read_filter, None);
588+
589+
// Depth should include all 3 reads
590+
assert_eq!(position.depth, 3, "Depth should be 3 (NORMAL + EMPTY_SEQ + NORMAL2)");
591+
592+
// 'A' count from NORMAL read
593+
assert_eq!(position.a, 1, "Should have 1 A from NORMAL read");
594+
595+
// 'G' count from NORMAL2 read
596+
assert_eq!(position.g, 1, "Should have 1 G from NORMAL2 read");
597+
598+
// 'N' count from EMPTY_SEQ read (empty seq should be counted as N)
599+
assert_eq!(position.n, 1, "Empty SEQ read should be counted as N");
600+
601+
break;
602+
}
603+
}
604+
}
605+
606+
/// Test that reads with empty SEQ work correctly with base quality filtering.
607+
/// When a base quality filter is set and SEQ is empty, the read should be counted as N.
608+
#[test]
609+
fn test_empty_seq_with_base_qual_filter() {
610+
use rust_htslib::bam::{IndexedReader, Writer, index};
611+
use tempfile::tempdir;
612+
613+
let tempdir = tempdir().unwrap();
614+
let bam_path = tempdir.path().join("empty_seq_bq.bam");
615+
616+
// Create header
617+
let mut header = bam::header::Header::new();
618+
let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
619+
chr1.push_tag(b"SN", &"chr1".to_owned());
620+
chr1.push_tag(b"LN", &"100".to_owned());
621+
header.push_record(&chr1);
622+
let view = bam::HeaderView::from_header(&header);
623+
624+
// Normal read with high quality
625+
let normal_record = Record::from_sam(
626+
&view,
627+
b"NORMAL\t0\tchr1\t1\t40\t25M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAA\tIIIIIIIIIIIIIIIIIIIIIIIII",
628+
)
629+
.unwrap();
630+
631+
// Read with empty SEQ
632+
let empty_seq_record = Record::from_sam(
633+
&view,
634+
b"EMPTY_SEQ\t0\tchr1\t1\t40\t25M\t*\t0\t0\t*\t*",
635+
)
636+
.unwrap();
637+
638+
// Write BAM file
639+
let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
640+
writer.write(&normal_record).unwrap();
641+
writer.write(&empty_seq_record).unwrap();
642+
drop(writer);
643+
644+
// Index the BAM file
645+
index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
646+
647+
// Read back and create pileup with base quality filter
648+
let mut reader = IndexedReader::from_path(&bam_path).unwrap();
649+
let header_view = reader.header().clone();
650+
651+
let read_filter = DefaultReadFilter::new(0, 0, 0);
652+
653+
reader.fetch(("chr1", 0, 1)).unwrap();
654+
let pileup_iter = reader.pileup();
655+
656+
for pileup_result in pileup_iter {
657+
let pileup = pileup_result.unwrap();
658+
if pileup.pos() == 0 {
659+
// With base quality filter of 20
660+
let position =
661+
PileupPosition::from_pileup(pileup, &header_view, &read_filter, Some(20));
662+
663+
// Depth should include both reads
664+
assert_eq!(position.depth, 2, "Depth should be 2");
665+
666+
// 'A' from NORMAL (quality 'I' = 40, passes filter)
667+
assert_eq!(position.a, 1, "Should have 1 A from high-quality NORMAL read");
668+
669+
// Empty SEQ read should be counted as N (regardless of base qual filter)
670+
assert_eq!(position.n, 1, "Empty SEQ read should be counted as N");
671+
672+
break;
673+
}
674+
}
675+
}
676+
677+
/// Test that reads with empty SEQ work correctly in mate-aware pileup mode.
678+
#[test]
679+
fn test_empty_seq_mate_aware() {
680+
use rust_htslib::bam::{IndexedReader, Writer, index};
681+
use tempfile::tempdir;
682+
683+
let tempdir = tempdir().unwrap();
684+
let bam_path = tempdir.path().join("empty_seq_mate.bam");
685+
686+
// Create header
687+
let mut header = bam::header::Header::new();
688+
let mut chr1 = bam::header::HeaderRecord::new(b"SQ");
689+
chr1.push_tag(b"SN", &"chr1".to_owned());
690+
chr1.push_tag(b"LN", &"100".to_owned());
691+
header.push_record(&chr1);
692+
let view = bam::HeaderView::from_header(&header);
693+
694+
// Normal read
695+
let normal_record = Record::from_sam(
696+
&view,
697+
b"NORMAL\t0\tchr1\t1\t40\t25M\t*\t0\t0\tAAAAAAAAAAAAAAAAAAAAAAAAA\t#########################",
698+
)
699+
.unwrap();
700+
701+
// Read with empty SEQ
702+
let empty_seq_record = Record::from_sam(
703+
&view,
704+
b"EMPTY_SEQ\t0\tchr1\t1\t40\t25M\t*\t0\t0\t*\t*",
705+
)
706+
.unwrap();
707+
708+
// Write BAM file
709+
let mut writer = Writer::from_path(&bam_path, &header, bam::Format::Bam).unwrap();
710+
writer.write(&normal_record).unwrap();
711+
writer.write(&empty_seq_record).unwrap();
712+
drop(writer);
713+
714+
// Index the BAM file
715+
index::build(&bam_path, None, index::Type::Bai, 1).unwrap();
716+
717+
// Read back and create pileup in mate-aware mode
718+
let mut reader = IndexedReader::from_path(&bam_path).unwrap();
719+
let header_view = reader.header().clone();
720+
721+
let read_filter = DefaultReadFilter::new(0, 0, 0);
722+
723+
reader.fetch(("chr1", 0, 1)).unwrap();
724+
let pileup_iter = reader.pileup();
725+
726+
for pileup_result in pileup_iter {
727+
let pileup = pileup_result.unwrap();
728+
if pileup.pos() == 0 {
729+
let position = PileupPosition::from_pileup_mate_aware(
730+
pileup,
731+
&header_view,
732+
&read_filter,
733+
None,
734+
MateResolutionStrategy::Original,
735+
);
736+
737+
// Depth should include both reads
738+
assert_eq!(position.depth, 2, "Depth should be 2");
739+
740+
// 'A' from NORMAL read
741+
assert_eq!(position.a, 1, "Should have 1 A from NORMAL read");
742+
743+
// Empty SEQ read should be counted as N
744+
assert_eq!(position.n, 1, "Empty SEQ read should be counted as N");
745+
746+
break;
747+
}
748+
}
749+
}
750+
522751
/// Test N strategy
523752
#[test]
524753
fn test_n_strategy() {

0 commit comments

Comments
 (0)