@@ -519,6 +519,235 @@ mod tests {
519519 }
520520 }
521521
522+ /// Test that reads with empty SEQ (represented as `*` in SAM format) are handled correctly
523+ /// in the pileup code path used by base-depth. Empty SEQ reads should still count toward
524+ /// depth, but their bases should be counted as N. This tests the fix for issue #91.
525+ #[ test]
526+ fn test_empty_seq_pileup ( ) {
527+ use rust_htslib:: bam:: { IndexedReader , Writer , index} ;
528+ use tempfile:: tempdir;
529+
530+ let tempdir = tempdir ( ) . unwrap ( ) ;
531+ let bam_path = tempdir. path ( ) . join ( "empty_seq.bam" ) ;
532+
533+ // Create header
534+ let mut header = bam:: header:: Header :: new ( ) ;
535+ let mut chr1 = bam:: header:: HeaderRecord :: new ( b"SQ" ) ;
536+ chr1. push_tag ( b"SN" , & "chr1" . to_owned ( ) ) ;
537+ chr1. push_tag ( b"LN" , & "100" . to_owned ( ) ) ;
538+ header. push_record ( & chr1) ;
539+ let view = bam:: HeaderView :: from_header ( & header) ;
540+
541+ // Create records including one with empty SEQ
542+ // Normal read with 'A' bases at positions 1-25
543+ let normal_record = Record :: from_sam (
544+ & view,
545+ b"NORMAL\t 0\t chr1\t 1\t 40\t 25M\t *\t 0\t 0\t AAAAAAAAAAAAAAAAAAAAAAAAA\t #########################" ,
546+ )
547+ . unwrap ( ) ;
548+
549+ // Read with empty SEQ at positions 10-35 - should count toward depth and N count
550+ let empty_seq_record = Record :: from_sam (
551+ & view,
552+ b"EMPTY_SEQ\t 0\t chr1\t 10\t 40\t 25M\t *\t 0\t 0\t *\t *" ,
553+ )
554+ . unwrap ( ) ;
555+
556+ // Another normal read with 'G' bases at positions 15-40
557+ let normal_record2 = Record :: from_sam (
558+ & view,
559+ b"NORMAL2\t 0\t chr1\t 15\t 40\t 25M\t *\t 0\t 0\t GGGGGGGGGGGGGGGGGGGGGGGGG\t #########################" ,
560+ )
561+ . unwrap ( ) ;
562+
563+ // Write BAM file
564+ let mut writer = Writer :: from_path ( & bam_path, & header, bam:: Format :: Bam ) . unwrap ( ) ;
565+ writer. write ( & normal_record) . unwrap ( ) ;
566+ writer. write ( & empty_seq_record) . unwrap ( ) ;
567+ writer. write ( & normal_record2) . unwrap ( ) ;
568+ drop ( writer) ;
569+
570+ // Index the BAM file
571+ index:: build ( & bam_path, None , index:: Type :: Bai , 1 ) . unwrap ( ) ;
572+
573+ // Read back and create pileup
574+ let mut reader = IndexedReader :: from_path ( & bam_path) . unwrap ( ) ;
575+ let header_view = reader. header ( ) . clone ( ) ;
576+
577+ let read_filter = DefaultReadFilter :: new ( 0 , 0 , 0 ) ;
578+
579+ // Test at position 14 (0-based) = position 15 (1-based)
580+ // At this position: NORMAL (A), EMPTY_SEQ (*), NORMAL2 (G) = depth 3
581+ reader. fetch ( ( "chr1" , 14 , 15 ) ) . unwrap ( ) ;
582+ let pileup_iter = reader. pileup ( ) ;
583+
584+ for pileup_result in pileup_iter {
585+ let pileup = pileup_result. unwrap ( ) ;
586+ if pileup. pos ( ) == 14 {
587+ let position = PileupPosition :: from_pileup ( pileup, & header_view, & read_filter, None ) ;
588+
589+ // Depth should include all 3 reads
590+ assert_eq ! ( position. depth, 3 , "Depth should be 3 (NORMAL + EMPTY_SEQ + NORMAL2)" ) ;
591+
592+ // 'A' count from NORMAL read
593+ assert_eq ! ( position. a, 1 , "Should have 1 A from NORMAL read" ) ;
594+
595+ // 'G' count from NORMAL2 read
596+ assert_eq ! ( position. g, 1 , "Should have 1 G from NORMAL2 read" ) ;
597+
598+ // 'N' count from EMPTY_SEQ read (empty seq should be counted as N)
599+ assert_eq ! ( position. n, 1 , "Empty SEQ read should be counted as N" ) ;
600+
601+ break ;
602+ }
603+ }
604+ }
605+
606+ /// Test that reads with empty SEQ work correctly with base quality filtering.
607+ /// When a base quality filter is set and SEQ is empty, the read should be counted as N.
608+ #[ test]
609+ fn test_empty_seq_with_base_qual_filter ( ) {
610+ use rust_htslib:: bam:: { IndexedReader , Writer , index} ;
611+ use tempfile:: tempdir;
612+
613+ let tempdir = tempdir ( ) . unwrap ( ) ;
614+ let bam_path = tempdir. path ( ) . join ( "empty_seq_bq.bam" ) ;
615+
616+ // Create header
617+ let mut header = bam:: header:: Header :: new ( ) ;
618+ let mut chr1 = bam:: header:: HeaderRecord :: new ( b"SQ" ) ;
619+ chr1. push_tag ( b"SN" , & "chr1" . to_owned ( ) ) ;
620+ chr1. push_tag ( b"LN" , & "100" . to_owned ( ) ) ;
621+ header. push_record ( & chr1) ;
622+ let view = bam:: HeaderView :: from_header ( & header) ;
623+
624+ // Normal read with high quality
625+ let normal_record = Record :: from_sam (
626+ & view,
627+ b"NORMAL\t 0\t chr1\t 1\t 40\t 25M\t *\t 0\t 0\t AAAAAAAAAAAAAAAAAAAAAAAAA\t IIIIIIIIIIIIIIIIIIIIIIIII" ,
628+ )
629+ . unwrap ( ) ;
630+
631+ // Read with empty SEQ
632+ let empty_seq_record = Record :: from_sam (
633+ & view,
634+ b"EMPTY_SEQ\t 0\t chr1\t 1\t 40\t 25M\t *\t 0\t 0\t *\t *" ,
635+ )
636+ . unwrap ( ) ;
637+
638+ // Write BAM file
639+ let mut writer = Writer :: from_path ( & bam_path, & header, bam:: Format :: Bam ) . unwrap ( ) ;
640+ writer. write ( & normal_record) . unwrap ( ) ;
641+ writer. write ( & empty_seq_record) . unwrap ( ) ;
642+ drop ( writer) ;
643+
644+ // Index the BAM file
645+ index:: build ( & bam_path, None , index:: Type :: Bai , 1 ) . unwrap ( ) ;
646+
647+ // Read back and create pileup with base quality filter
648+ let mut reader = IndexedReader :: from_path ( & bam_path) . unwrap ( ) ;
649+ let header_view = reader. header ( ) . clone ( ) ;
650+
651+ let read_filter = DefaultReadFilter :: new ( 0 , 0 , 0 ) ;
652+
653+ reader. fetch ( ( "chr1" , 0 , 1 ) ) . unwrap ( ) ;
654+ let pileup_iter = reader. pileup ( ) ;
655+
656+ for pileup_result in pileup_iter {
657+ let pileup = pileup_result. unwrap ( ) ;
658+ if pileup. pos ( ) == 0 {
659+ // With base quality filter of 20
660+ let position =
661+ PileupPosition :: from_pileup ( pileup, & header_view, & read_filter, Some ( 20 ) ) ;
662+
663+ // Depth should include both reads
664+ assert_eq ! ( position. depth, 2 , "Depth should be 2" ) ;
665+
666+ // 'A' from NORMAL (quality 'I' = 40, passes filter)
667+ assert_eq ! ( position. a, 1 , "Should have 1 A from high-quality NORMAL read" ) ;
668+
669+ // Empty SEQ read should be counted as N (regardless of base qual filter)
670+ assert_eq ! ( position. n, 1 , "Empty SEQ read should be counted as N" ) ;
671+
672+ break ;
673+ }
674+ }
675+ }
676+
677+ /// Test that reads with empty SEQ work correctly in mate-aware pileup mode.
678+ #[ test]
679+ fn test_empty_seq_mate_aware ( ) {
680+ use rust_htslib:: bam:: { IndexedReader , Writer , index} ;
681+ use tempfile:: tempdir;
682+
683+ let tempdir = tempdir ( ) . unwrap ( ) ;
684+ let bam_path = tempdir. path ( ) . join ( "empty_seq_mate.bam" ) ;
685+
686+ // Create header
687+ let mut header = bam:: header:: Header :: new ( ) ;
688+ let mut chr1 = bam:: header:: HeaderRecord :: new ( b"SQ" ) ;
689+ chr1. push_tag ( b"SN" , & "chr1" . to_owned ( ) ) ;
690+ chr1. push_tag ( b"LN" , & "100" . to_owned ( ) ) ;
691+ header. push_record ( & chr1) ;
692+ let view = bam:: HeaderView :: from_header ( & header) ;
693+
694+ // Normal read
695+ let normal_record = Record :: from_sam (
696+ & view,
697+ b"NORMAL\t 0\t chr1\t 1\t 40\t 25M\t *\t 0\t 0\t AAAAAAAAAAAAAAAAAAAAAAAAA\t #########################" ,
698+ )
699+ . unwrap ( ) ;
700+
701+ // Read with empty SEQ
702+ let empty_seq_record = Record :: from_sam (
703+ & view,
704+ b"EMPTY_SEQ\t 0\t chr1\t 1\t 40\t 25M\t *\t 0\t 0\t *\t *" ,
705+ )
706+ . unwrap ( ) ;
707+
708+ // Write BAM file
709+ let mut writer = Writer :: from_path ( & bam_path, & header, bam:: Format :: Bam ) . unwrap ( ) ;
710+ writer. write ( & normal_record) . unwrap ( ) ;
711+ writer. write ( & empty_seq_record) . unwrap ( ) ;
712+ drop ( writer) ;
713+
714+ // Index the BAM file
715+ index:: build ( & bam_path, None , index:: Type :: Bai , 1 ) . unwrap ( ) ;
716+
717+ // Read back and create pileup in mate-aware mode
718+ let mut reader = IndexedReader :: from_path ( & bam_path) . unwrap ( ) ;
719+ let header_view = reader. header ( ) . clone ( ) ;
720+
721+ let read_filter = DefaultReadFilter :: new ( 0 , 0 , 0 ) ;
722+
723+ reader. fetch ( ( "chr1" , 0 , 1 ) ) . unwrap ( ) ;
724+ let pileup_iter = reader. pileup ( ) ;
725+
726+ for pileup_result in pileup_iter {
727+ let pileup = pileup_result. unwrap ( ) ;
728+ if pileup. pos ( ) == 0 {
729+ let position = PileupPosition :: from_pileup_mate_aware (
730+ pileup,
731+ & header_view,
732+ & read_filter,
733+ None ,
734+ MateResolutionStrategy :: Original ,
735+ ) ;
736+
737+ // Depth should include both reads
738+ assert_eq ! ( position. depth, 2 , "Depth should be 2" ) ;
739+
740+ // 'A' from NORMAL read
741+ assert_eq ! ( position. a, 1 , "Should have 1 A from NORMAL read" ) ;
742+
743+ // Empty SEQ read should be counted as N
744+ assert_eq ! ( position. n, 1 , "Empty SEQ read should be counted as N" ) ;
745+
746+ break ;
747+ }
748+ }
749+ }
750+
522751 /// Test N strategy
523752 #[ test]
524753 fn test_n_strategy ( ) {
0 commit comments