diff --git a/.github/workflows/Barcode.Custom.10x.yml b/.github/workflows/Barcode.Custom.10x.yml
new file mode 100644
index 00000000..92c1f606
--- /dev/null
+++ b/.github/workflows/Barcode.Custom.10x.yml
@@ -0,0 +1,82 @@
+name: Barcode Custom SC 10x
+
+on:
+  workflow_dispatch:
+  schedule:
+  - cron: '0 4 * * 1'
+
+env:
+  LAUNCHER: ${{github.workspace}}/tests/github/run_barcode_test.py
+  CFG_DIR: /abga/work/andreyp/ci_isoquant/data/barcodes
+  BIN_PATH: /abga/work/andreyp/ci_isoquant/bin/
+  OUTPUT_BASE: /abga/work/andreyp/ci_isoquant/output/${{github.ref_name}}/barcodes/
+
+concurrency:
+  group: ${{github.workflow}}
+  cancel-in-progress: false
+
+jobs:
+  check-changes:
+    runs-on:
+      labels: [isoquant]
+    name: 'Check for recent changes'
+    outputs:
+      has_changes: ${{steps.check.outputs.has_changes}}
+    steps:
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: 'Check for commits in last 7 days'
+        id: check
+        run: |
+          # Always run on manual trigger
+          if [ "${{github.event_name}}" = "workflow_dispatch" ]; then
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          # Check for commits in last 7 days
+          COMMITS=$(git log --oneline --since="7 days ago" | wc -l)
+          if [ "$COMMITS" -gt 0 ]; then
+            echo "Found $COMMITS commits in last 7 days"
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "No commits in last 7 days, skipping"
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+          fi
+
+  launch-runner:
+    needs: check-changes
+    if: needs.check-changes.outputs.has_changes == 'true'
+    runs-on:
+      labels: [isoquant]
+    name: 'Running custom_sc 10x barcode detection QC'
+
+    steps:
+      - name: 'Cleanup'
+        run: >
+          set -e &&
+          shopt -s dotglob &&
+          rm -rf *
+
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 1
+
+      - name: 'Custom SC 10x small (ArrayKmerIndexer)'
+        shell: bash
+        env:
+          STEP_NAME: Mouse.10x.custom_sc.small
+        run: |
+          export PATH=$PATH:${{env.BIN_PATH}}
+          python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.cfg -o ${{env.OUTPUT_BASE}}
+
+      - name: 'Custom SC 10x large (Array2BitKmerIndexer)'
+        shell: bash
+        env:
+          STEP_NAME: Mouse.10x.custom_sc.large
+        run: |
+          export PATH=$PATH:${{env.BIN_PATH}}
+          python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.cfg -o ${{env.OUTPUT_BASE}}
diff --git a/.github/workflows/Barcode.Custom.Curio.yml b/.github/workflows/Barcode.Custom.Curio.yml
new file mode 100644
index 00000000..3c2b0c69
--- /dev/null
+++ b/.github/workflows/Barcode.Custom.Curio.yml
@@ -0,0 +1,82 @@
+name: Barcode Custom SC Curio
+
+on:
+  workflow_dispatch:
+  schedule:
+  - cron: '0 3 * * 3'
+
+env:
+  LAUNCHER: ${{github.workspace}}/tests/github/run_barcode_test.py
+  CFG_DIR: /abga/work/andreyp/ci_isoquant/data/barcodes
+  BIN_PATH: /abga/work/andreyp/ci_isoquant/bin/
+  OUTPUT_BASE: /abga/work/andreyp/ci_isoquant/output/${{github.ref_name}}/barcodes/
+
+concurrency:
+  group: ${{github.workflow}}
+  cancel-in-progress: false
+
+jobs:
+  check-changes:
+    runs-on:
+      labels: [isoquant]
+    name: 'Check for recent changes'
+    outputs:
+      has_changes: ${{steps.check.outputs.has_changes}}
+    steps:
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: 'Check for commits in last 7 days'
+        id: check
+        run: |
+          # Always run on manual trigger
+          if [ "${{github.event_name}}" = "workflow_dispatch" ]; then
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          # Check for commits in last 7 days
+          COMMITS=$(git log --oneline --since="7 days ago" | wc -l)
+          if [ "$COMMITS" -gt 0 ]; then
+            echo "Found $COMMITS commits in last 7 days"
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "No commits in last 7 days, skipping"
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+          fi
+
+  launch-runner:
+    needs: check-changes
+    if: needs.check-changes.outputs.has_changes == 'true'
+    runs-on:
+      labels: [isoquant]
+    name: 'Running custom_sc Curio barcode detection QC'
+
+    steps:
+      - name: 'Cleanup'
+        run: >
+          set -e &&
+          shopt -s dotglob &&
+          rm -rf *
+
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 1
+
+      - name: 'Custom SC Curio small (ArrayKmerIndexer)'
+        shell: bash
+        env:
+          STEP_NAME: Human.Curio.custom_sc.small
+        run: |
+          export PATH=$PATH:${{env.BIN_PATH}}
+          python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.cfg -o ${{env.OUTPUT_BASE}}
+
+      - name: 'Custom SC Curio large (Array2BitKmerIndexer)'
+        shell: bash
+        env:
+          STEP_NAME: Human.Curio.custom_sc.large
+        run: |
+          export PATH=$PATH:${{env.BIN_PATH}}
+          python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.cfg -o ${{env.OUTPUT_BASE}}
diff --git a/.github/workflows/Barcode.Custom.StereoSeq.yml b/.github/workflows/Barcode.Custom.StereoSeq.yml
new file mode 100644
index 00000000..161521d3
--- /dev/null
+++ b/.github/workflows/Barcode.Custom.StereoSeq.yml
@@ -0,0 +1,82 @@
+name: Barcode Custom SC StereoSeq
+
+on:
+  workflow_dispatch:
+  schedule:
+  - cron: '0 1 * * 5'
+
+env:
+  LAUNCHER: ${{github.workspace}}/tests/github/run_barcode_test.py
+  CFG_DIR: /abga/work/andreyp/ci_isoquant/data/barcodes
+  BIN_PATH: /abga/work/andreyp/ci_isoquant/bin/
+  OUTPUT_BASE: /abga/work/andreyp/ci_isoquant/output/${{github.ref_name}}/barcodes/
+
+concurrency:
+  group: ${{github.workflow}}
+  cancel-in-progress: false
+
+jobs:
+  check-changes:
+    runs-on:
+      labels: [isoquant]
+    name: 'Check for recent changes'
+    outputs:
+      has_changes: ${{steps.check.outputs.has_changes}}
+    steps:
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: 'Check for commits in last 7 days'
+        id: check
+        run: |
+          # Always run on manual trigger
+          if [ "${{github.event_name}}" = "workflow_dispatch" ]; then
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          # Check for commits in last 7 days
+          COMMITS=$(git log --oneline --since="7 days ago" | wc -l)
+          if [ "$COMMITS" -gt 0 ]; then
+            echo "Found $COMMITS commits in last 7 days"
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "No commits in last 7 days, skipping"
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+          fi
+
+  launch-runner:
+    needs: check-changes
+    if: needs.check-changes.outputs.has_changes == 'true'
+    runs-on:
+      labels: [isoquant]
+    name: 'Running custom_sc StereoSeq barcode detection QC'
+
+    steps:
+      - name: 'Cleanup'
+        run: >
+          set -e &&
+          shopt -s dotglob &&
+          rm -rf *
+
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 1
+
+      - name: 'Custom SC StereoSeq small (KmerIndexer)'
+        shell: bash
+        env:
+          STEP_NAME: Mouse.StereoSeq.custom_sc.small
+        run: |
+          export PATH=$PATH:${{env.BIN_PATH}}
+          python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.cfg -o ${{env.OUTPUT_BASE}}
+
+      - name: 'Custom SC StereoSeq large (Dict2BitKmerIndexer)'
+        shell: bash
+        env:
+          STEP_NAME: Mouse.StereoSeq.custom_sc.large
+        run: |
+          export PATH=$PATH:${{env.BIN_PATH}}
+          python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.cfg -o ${{env.OUTPUT_BASE}}
diff --git a/.github/workflows/Barcode.Custom.VisiumHD.yml b/.github/workflows/Barcode.Custom.VisiumHD.yml
new file mode 100644
index 00000000..5e32ed2a
--- /dev/null
+++ b/.github/workflows/Barcode.Custom.VisiumHD.yml
@@ -0,0 +1,82 @@
+name: Barcode Custom SC Visium HD
+
+on:
+  workflow_dispatch:
+  schedule:
+  - cron: '0 2 * * 4'
+
+env:
+  LAUNCHER: ${{github.workspace}}/tests/github/run_barcode_test.py
+  CFG_DIR: /abga/work/andreyp/ci_isoquant/data/barcodes
+  BIN_PATH: /abga/work/andreyp/ci_isoquant/bin/
+  OUTPUT_BASE: /abga/work/andreyp/ci_isoquant/output/${{github.ref_name}}/barcodes/
+
+concurrency:
+  group: ${{github.workflow}}
+  cancel-in-progress: false
+
+jobs:
+  check-changes:
+    runs-on:
+      labels: [isoquant]
+    name: 'Check for recent changes'
+    outputs:
+      has_changes: ${{steps.check.outputs.has_changes}}
+    steps:
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: 'Check for commits in last 7 days'
+        id: check
+        run: |
+          # Always run on manual trigger
+          if [ "${{github.event_name}}" = "workflow_dispatch" ]; then
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          # Check for commits in last 7 days
+          COMMITS=$(git log --oneline --since="7 days ago" | wc -l)
+          if [ "$COMMITS" -gt 0 ]; then
+            echo "Found $COMMITS commits in last 7 days"
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "No commits in last 7 days, skipping"
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+          fi
+
+  launch-runner:
+    needs: check-changes
+    if: needs.check-changes.outputs.has_changes == 'true'
+    runs-on:
+      labels: [isoquant]
+    name: 'Running custom_sc Visium HD barcode detection QC'
+
+    steps:
+      - name: 'Cleanup'
+        run: >
+          set -e &&
+          shopt -s dotglob &&
+          rm -rf *
+
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 1
+
+      - name: 'Custom SC Visium HD small (ArrayKmerIndexer)'
+        shell: bash
+        env:
+          STEP_NAME: Mouse.VisiumHD.custom_sc.small
+        run: |
+          export PATH=$PATH:${{env.BIN_PATH}}
+          python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.cfg -o ${{env.OUTPUT_BASE}}
+
+      - name: 'Custom SC Visium HD large (Array2BitKmerIndexer)'
+        shell: bash
+        env:
+          STEP_NAME: Mouse.VisiumHD.custom_sc.large
+        run: |
+          export PATH=$PATH:${{env.BIN_PATH}}
+          python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.cfg -o ${{env.OUTPUT_BASE}}
diff --git a/.github/workflows/SC.Mouse.10x.yml b/.github/workflows/SC.Mouse.10x.yml
new file mode 100644
index 00000000..d13de1dc
--- /dev/null
+++ b/.github/workflows/SC.Mouse.10x.yml
@@ -0,0 +1,74 @@
+name: SC Mouse 10x Allinfo
+
+on:
+  workflow_dispatch:
+  schedule:
+  - cron: '0 5 * * 6'
+
+env:
+  LAUNCHER: ${{github.workspace}}/tests/github/run_pipeline.py
+  CFG_DIR: /abga/work/andreyp/ci_isoquant/data
+  BIN_PATH: /abga/work/andreyp/ci_isoquant/bin/
+  OUTPUT_BASE: /abga/work/andreyp/ci_isoquant/output/${{github.ref_name}}/
+
+concurrency:
+  group: ${{github.workflow}}
+  cancel-in-progress: false
+
+jobs:
+  check-changes:
+    runs-on:
+      labels: [isoquant]
+    name: 'Check for recent changes'
+    outputs:
+      has_changes: ${{steps.check.outputs.has_changes}}
+    steps:
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: 'Check for commits in last 7 days'
+        id: check
+        run: |
+          # Always run on manual trigger
+          if [ "${{github.event_name}}" = "workflow_dispatch" ]; then
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          # Check for commits in last 7 days
+          COMMITS=$(git log --oneline --since="7 days ago" | wc -l)
+          if [ "$COMMITS" -gt 0 ]; then
+            echo "Found $COMMITS commits in last 7 days"
+            echo "has_changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "No commits in last 7 days, skipping"
+            echo "has_changes=false" >> $GITHUB_OUTPUT
+          fi
+
+  launch-runner:
+    needs: check-changes
+    if: needs.check-changes.outputs.has_changes == 'true'
+    runs-on:
+      labels: [isoquant]
+    name: 'Running SC Mouse 10x pipeline with allinfo QC'
+
+    steps:
+      - name: 'Cleanup'
+        run: >
+          set -e &&
+          shopt -s dotglob &&
+          rm -rf *
+
+      - name: 'Checkout'
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 1
+
+      - name: 'SC Mouse 10x allinfo'
+        shell: bash
+        env:
+          STEP_NAME: SC.Mouse.10x.allinfo
+        run: |
+          export PATH=$PATH:${{env.BIN_PATH}}
+          python3 ${{env.LAUNCHER}} ${{env.CFG_DIR}}/${{env.STEP_NAME}}.cfg -o ${{env.OUTPUT_BASE}}
diff --git a/docs/barcode_calling.md b/docs/barcode_calling.md
new file mode 100644
index 00000000..e0d64884
--- /dev/null
+++ b/docs/barcode_calling.md
@@ -0,0 +1,227 @@
+# Barcode calling
+
+IsoQuant includes a built-in barcode calling module (`isoquant_detect_barcodes.py`) that extracts
+ barcodes and UMIs from raw long reads. This module supports multiple single-cell and
+spatial transcriptomics platforms and can also be used as a standalone tool.
+
+See [single-cell and spatial transcriptomics](single_cell.md) for supported platforms,
+pipeline integration, MDF format, and UMI deduplication.
+
+## How it works
+
+The barcode calling module processes each read by:
+
+1. Searching for known constant sequences (linkers, primers, TSO) on the read
+2. Locating the polyT tail to determine read orientation
+3. Extracting the barcode region based on its expected position relative to the anchoring constant sequences
+4. Matching the extracted barcode against a whitelist
+5. Extracting the UMI sequence from its expected position
+
+Each read is assigned:
+- A **barcode** (cell/spot identity) corrected against the whitelist, or `*` if no match is found
+- A **UMI** (unique molecular identifier), or `*` if not detected
+- A **strand** orientation (`+`, `-`, or `.` if unknown)
+- Platform-specific features (polyT position, linker positions, etc.)
+
+## Integration with IsoQuant
+
+When running IsoQuant with any single-cell or spatial mode, 
+barcode calling is performed automatically as the first pipeline step.
+The barcode detection output is a TSV file with one line per read, which is then used
+throughout the rest of the pipeline for grouping and UMI deduplication.
+
+If barcodes have already been called externally (e.g., by Cell Ranger),
+use `--barcoded_reads` to skip the barcode calling step.
+
+## Standalone usage
+
+`isoquant_detect_barcodes.py` can be run independently:
+
+```bash
+python isoquant_detect_barcodes.py \
+  --input reads.fastq.gz \
+  --barcodes barcode_whitelist.txt \
+  --mode tenX_v3 \
+  --output output_prefix \
+  --threads 16
+```
+
+### Command line options
+
+`--input` or `-i` (required)
+
+One or more input read files in FASTQ, FASTA or BAM format. Gzipped FASTQ/FASTA files are supported.
+
+`--output` or `-o` (required)
+
+Output prefix. The barcode calling results will be written to `<prefix>.barcoded_reads.tsv`.
+When multiple input files are provided, outputs are numbered: `<prefix>_0.barcoded_reads.tsv`, etc.
+
+`--barcodes` or `-b`
+
+One or more barcode whitelist files. The number of files depends on the mode:
+
+* 1 file: `tenX_v3`, `visium_5prime`, `stereoseq`, `stereoseq_nosplit`, `curio`
+* 2 files: `visium_hd`
+* Not needed for `custom_sc` (barcodes defined in MDF file)
+
+File should contain one barcode sequence per line. 
+More than 1 tab-separated column is allowed, but only the first will be used.
+Supports plain text and gzipped files.
+
+_Note: barcode calling is performed much better if the whitelist contains a small number of barcodes. 
+If you have a subset of barcodes from short-read data, provide them instead of the full whitelist._
+
+`--mode`
+
+Barcode calling mode. Available modes: `tenX_v3`, `curio`, `stereoseq`, `stereoseq_nosplit`,
+`visium_5prime`, `visium_hd`, `custom_sc`. 
+
+`--molecule`
+Path to a molecule description format (MDF) file for `custom_sc` mode.
+This file defines the structure of the sequencing molecule (barcodes, UMIs, linkers, polyT, cDNA)ю
+See [MDF format](single_cell.md#molecule-definition-file-mdf-format) for the format specification.
+
+`--threads` or `-t`
+
+Number of threads for parallel processing (default: 16).
+
+`--tmp_dir`
+
+Folder for temporary files during parallel processing.
+
+### Example commands
+
+10x Genomics:
+```bash
+python isoquant_detect_barcodes.py \
+  -i reads.fastq.gz \
+  -b 3M-february-2018.txt.gz \
+  --mode tenX_v3 \
+  -o results/my_sample \
+  -t 16
+```
+
+Stereo-seq:
+```bash
+python isoquant_detect_barcodes.py \
+  -i reads.fastq.gz \
+  -b stereo_barcodes.txt \
+  --mode stereoseq \
+  -o results/stereo_sample \
+  -t 16
+```
+
+Custom molecule:
+```bash
+python isoquant_detect_barcodes.py \
+  -i reads.fastq.gz \
+  --mode custom_sc \
+  --molecule my_platform.mdf \
+  -o results/custom_sample
+```
+
+## Output
+
+The main output is a TSV file (`*.barcoded_reads.tsv`) with a header line followed by one line per read.
+The columns depend on the platform mode.
+
+### Common columns (all modes)
+
+| Column | Description |
+|--------|-------------|
+| read_id | Read identifier |
+| barcode | Corrected barcode sequence, or `*` if not found |
+| UMI | UMI sequence, or `*` if not detected |
+| BC_score | Barcode alignment score (-1 if not matched) |
+| strand | Read orientation: `+`, `-`, or `.` |
+
+### Additional columns by mode
+
+**10x Genomics** (`tenX_v3`, `visium_5prime`, `visium_hd`):
+
+| Column | Description |
+|--------|-------------|
+| polyT | Position of polyT tail start (-1 if not found) |
+| R1 | Position of R1 primer end (-1 if not found) |
+
+**Curio** (`curio`):
+
+| Column | Description |
+|--------|-------------|
+| polyT | PolyT tail position |
+| primer | PCR primer position |
+| linker_start | Linker start position |
+| linker_end | Linker end position |
+
+**Stereo-seq** (`stereoseq_nosplit`, `stereoseq`):
+
+| Column | Description |
+|--------|-------------|
+| polyT | PolyT tail position |
+| primer | Primer position |
+| linker_start | Linker start position |
+| linker_end | Linker end position |
+| TSO | TSO position (-1 if not found) |
+
+**Custom** (`custom_sc`): columns depend on the elements defined in the MDF file.
+Each variable element produces a column with its detected sequence and score.
+
+### Statistics file
+
+A statistics file (`*.barcoded_reads.tsv.stats`) is generated alongside the TSV output,
+reporting the total number of reads processed, barcodes detected, and UMIs found.
+
+## Supported platforms
+
+### 10x Genomics (`tenX_v3`, `visium_5prime`)
+
+Molecule structure (3' to 5'):
+```
+[R1 primer] [Barcode (16bp)] [UMI (12bp)] [PolyT] [cDNA] [TSO]
+```
+
+Requires 1 barcode whitelist file (e.g., the 10x `3M-february-2018.txt.gz`).
+
+### Visium HD (`visium_hd`)
+
+Molecule structure (3' to 5'):
+```
+[R1 primer] [UMI (9bp)] [BC_part1 (15-16bp)] [BC_part2 (14-15bp)] VV [PolyT] [cDNA] [TSO]
+```
+
+Requires 2 barcode whitelist files (part 1 and part 2).
+The final barcode is the concatenation of both parts.
+
+### Curio Bioscience (`curio`)
+
+Molecule structure (3' to 5'):
+```
+[PCR primer] [Left BC (8bp)] [Linker] [Right BC (6bp)] [UMI (9bp)] [PolyT] [cDNA]
+```
+
+The linker sequence anchors the barcode detection.
+Accepts 1 combined whitelist file (14bp barcodes).
+
+### Stereo-seq (`stereoseq`, `stereoseq_nosplit`)
+
+Molecule structure (3' to 5'):
+```
+[Primer] [Barcode (25bp)] [Linker] [UMI (10bp)] [PolyT] [cDNA] [TSO]
+```
+
+- `stereoseq` mode: splits concatenated reads at TSO boundaries, producing a new FASTA with individual subreads
+- `stereoseq_nosplit` mode: processes reads without splitting
+
+### Custom platform (`custom_sc`)
+
+Uses a molecule definition file (MDF) to describe arbitrary molecule structures.
+See [MDF format](single_cell.md#molecule-definition-file-mdf-format).
+
+The universal extractor:
+1. Tries both forward and reverse complement orientations
+2. Detects polyT tail to determine strand
+3. Finds constant elements (primers, linkers) on the read
+4. Extracts variable elements (barcodes, UMIs) from their expected positions
+5. Corrects barcodes against whitelists when provided (`VAR_FILE` or `VAR_LIST`)
+6. Handles linked elements: concatenated (`|`) parts are joined then corrected; duplicated (`/`) parts use majority vote
diff --git a/docs/cmd.md b/docs/cmd.md
index a9bb7aef..462d530e 100644
--- a/docs/cmd.md
+++ b/docs/cmd.md
@@ -96,7 +96,7 @@ Use this option at your own risk.
 Input file names are used as labels if not set.
 
 `--read_group`
- Sets one or more ways to group feature counts (e.g. by cell type, file, or barcode).
+ Sets one or more ways to group feature counts (e.g. by cell type, file name, BAM tag or barcode).
  Multiple grouping strategies can be combined (space-separated).
  Available grouping options:
 
@@ -106,8 +106,8 @@ If multiple BAM/FASTQ files are provided and `--read_group` option is not set, I
 by default.
 
  * `tag:TAG` - groups reads by BAM file read tag, where `TAG` is the tag name
-(e.g. `tag:CB` uses `CB` tag values as groups, commonly used for cell barcodes in single-cell data).
-
+(e.g. `tag:RG` uses `RG` tag values as groups, commonly used for read group information).
+q
  * `read_id:DELIM` - groups reads by read name suffix, where `DELIM` is the
 symbol/string by which the read id will be split
 (e.g. if `DELIM` is `_`, for read `m54158_180727_042959_59310706_ccs_NEU` the group will be `NEU`).
@@ -117,49 +117,17 @@ where `FILE` is the file path, `READ_COL` is column with read ids (default: 0),
 `GROUP_COL(S)` is column(s) with group ids (default: 1; use comma-separated columns for multi-column grouping, e.g., `1,2,3`),
 `DELIM` is separator symbol (default: tab). File can be gzipped.
 
- * `barcode_spot` or `barcode_spot:FILE` - maps barcodes to spots/cell types.
-Uses barcode-to-spot mapping from `--barcode2spot` file by default, or from explicit `FILE` if specified.
+ * `barcode_spot` - group by barcode properties, such as spots/cell types.
+Set barcode-to-spot mapping via `--barcode2spot` option.
 Useful for grouping single-cell/spatial data by cell type or spatial region instead of individual barcodes.
 
-**Example**: `--read_group tag:CB file_name barcode_spot` creates multi-level grouping by cell barcode tag, file name, and cell type.
-
+* `barcode` - groups reads by cell barcode.
+When the number of barcodes is large, grouping counts by individual barcodes may take a long time.
+It is recommended to use `--barcode2spot` file instead.
 
-### Single-cell and spatial transcriptomics options
+**Example**: `--read_group tag:RG file_name barcode_spot` creates multi-level grouping by read group tag, 
+original file name, and barcode property (e.g. cell type).
 
-**NB! This functionality is not officially released yet, use it at your own risk.**
-
-`--mode` or `-m`
-IsoQuant mode for processing single-cell or spatial transcriptomics data. Available modes:
-
-* `bulk` - standard bulk RNA-seq mode (default)
-* `tenX_v3` - 10x Genomics single-cell 3' gene expression
-* `curio` - Curio Bioscience single-cell
-* `visium_hd` - 10x Genomics Visium HD spatial transcriptomics
-* `visium_5prime` - 10x Genomics Visium 5' spatial transcriptomics
-* `stereoseq` - Stereo-seq spatial transcriptomics (BGI)
-* `stereoseq_nosplit` - Stereo-seq without barcode splitting
-
-Single-cell and spatial modes enable automatic barcode calling and UMI-based deduplication.
-
-`--barcode_whitelist`
-Path to file(s) with barcode whitelist for barcode calling.
-Required for single-cell/spatial modes unless `--barcoded_reads` is provided.
-File should contain one barcode per line.
-
-`--barcoded_reads`
-Path to TSV file(s) with barcoded reads.
-Format: `read_id<TAB>barcode<TAB>umi` (one read per line).
-If not provided, barcodes will be called automatically from raw reads using `--barcode_whitelist`.
-
-`--barcode_column`
-Column index for barcodes in the `--barcoded_reads` file (default: 1).
-Read ID column is 0, barcode column is 1, UMI column is 2.
-
-`--barcode2spot`
-Path to TSV file(s) mapping barcodes to cell types or spatial spots.
-Format: `barcode<TAB>cell_type` (one barcode per line).
-Used with `--read_group barcode_spot` to group counts by cell type/spot instead of individual barcodes.
-Useful for reducing output dimensions from thousands of barcodes to tens of cell types.
 
 ### Output options
 
@@ -200,6 +168,62 @@ Useful for reducing output dimensions from thousands of barcodes to tens of cell
     Align reads to the reference without running IsoQuant itself.
 
 
+## Single-cell and spatial transcriptomics options
+
+**NB! This feature is experimental and is not part of the official IsoQuant release.**
+
+See [single-cell and spatial transcriptomics](single_cell.md) for a detailed guide on
+supported platforms, examples, molecule description format, and UMI deduplication.
+
+`--mode` or `-m`
+IsoQuant mode for processing single-cell or spatial transcriptomics data. Available modes:
+
+* `bulk` - standard bulk RNA-seq mode (default)
+* `tenX_v3` - 10x Genomics single-cell 3' gene expression
+* `visium_5prime` - 10x Genomics Visium 5' spatial transcriptomics
+* `visium_hd` - 10x Genomics Visium HD spatial transcriptomics
+* `curio` - Curio Bioscience spatial data
+* `stereoseq` - Stereo-seq spatial data
+* `stereoseq_nosplit` - Stereo-seq without read splitting
+* `custom_sc` - custom single-cell/spatial mode using a [molecule definition file](single_cell.md#molecule-definition-file-mdf-format)
+
+Single-cell and spatial modes enable automatic barcode calling and UMI-based deduplication.
+
+`--barcode_whitelist`
+Path to file(s) with barcode whitelist(s) for barcode calling.
+Required for single-cell/spatial modes unless `--barcoded_reads` is provided.
+
+File should contain one barcode sequence per line. 
+More than 1 tab-separated column is allowed, but only the first will be used.
+Supports plain text and gzipped files.
+
+_Note: barcode calling is performed much better if the whitelist contains a small number of barcodes. 
+If you have a subset of barcodes from short-read data, provide them instead of the full whitelist._
+
+`--barcoded_reads`
+Path to TSV file(s) with pre-called barcoded reads.
+Format: `read_id<TAB>barcode<TAB>umi` (one read per line).
+If provided, IsoQuant skips barcode calling and uses these assignments directly.
+More than 3 columns are allowed, but only the first 3 will be used.
+
+Note! IsoQuant does not read barcodes or UMIs from BAM file tags.
+
+`--barcode2spot`
+Path to TSV file mapping barcodes to cell types, spatial spots, or other barcode properties.
+By default, barcode is in the first column, cell type in the second.
+However, you can specify one or more columns via colon symbol (similar to `--read_group`): 
+`file.tsv:barcode_column:spot_column(s)` (e.g., `cell_types.tsv:0:1,2,3` for multiple barcode properties).
+
+When `--barcode2spot` is set, `--read_group barcode_spot` will be set automatically
+to group counts by cell type, spatial regions, or other provided properties.
+
+`--molecule`
+Path to a molecule description file (MDF) for `custom_sc` mode.
+Defines molecule structure for universal barcode extraction from any platform.
+See [MDF format](single_cell.md#molecule-definition-file-mdf-format) for details.
+
+
+
 ## Algorithm parameters
 
 ### Quantification
diff --git a/docs/single_cell.md b/docs/single_cell.md
new file mode 100644
index 00000000..1d88402d
--- /dev/null
+++ b/docs/single_cell.md
@@ -0,0 +1,258 @@
+# Single-cell and spatial transcriptomics
+
+**NB! This feature is experimental and is not part of the official IsoQuant release.**
+
+IsoQuant supports single-cell and spatial transcriptomics data from multiple platforms.
+When a single-cell or spatial mode is selected, IsoQuant automatically performs
+barcode calling and UMI-based PCR deduplication as part of the pipeline.
+
+## Overview
+
+The single-cell/spatial pipeline extends the standard bulk pipeline with these additional steps:
+
+1. **Barcode calling** -- extract cell/spot barcodes and UMIs from raw reads
+2. **Standard IsoQuant processing** -- alignment, read-to-isoform assignment
+3. **UMI deduplication** -- remove PCR/RT duplicates within each barcode group
+4. **Grouped quantification** -- produce per-cell/per-spot count matrices
+
+Note: UMI deduplication relies on read-to-gene assignment. Reads that are not assigned to any gene are discarded.
+Hence, novel gene discovery will not be performed in single-cell/spatial mode. 
+We recommend using `bulk` mode for novel gene and transcript discovery.
+
+Barcode calling is handled by the built-in [barcode calling module](barcode_calling.md),
+which can also be used as a standalone tool.
+
+## Supported platforms
+
+| Mode | Platform | Barcode whitelist files | UMI length | Notes                                                                                |
+|------|----------|--------------------|------------|--------------------------------------------------------------------------------------|
+| `tenX_v3` | 10x Genomics 3' v3 | 1 | 12 | Single 16bp barcode                                                                  |
+| `visium_5prime` | 10x Genomics Visium 5' | 1 | 12 | Same detector as tenX_v3                                                             |
+| `visium_hd` | 10x Genomics Visium HD | 2 | 9 | Two barcodes (15/16bp + 14/15bp)                                                     |
+| `curio` | Curio Bioscience | 1  | 9 | Double barcode (8bp + 6bp) with linker                                               |
+| `stereoseq` | Stereo-seq | 1 | 10 | 25bp barcode, read splitting mode                                                    |
+| `stereoseq_nosplit` | Stereo-seq | 1 | 10 | 25bp barcode, no read splitting                                                      |
+| `custom_sc` | Any platform | 0 (uses MDF) | varies | User-defined molecule structure via [MDF file](#molecule-definition-file-mdf-format) |
+
+## Quick start examples
+
+10x Genomics single-cell:
+```bash
+isoquant.py --reference genome.fa --genedb genes.gtf --complete_genedb \
+  --fastq reads.fastq.gz --data_type nanopore \
+  --mode tenX_v3 --barcode_whitelist 3M-february-2018.txt.gz \
+  -o sc_output
+```
+
+Stereo-seq spatial:
+```bash
+isoquant.py --reference genome.fa --genedb genes.gtf --complete_genedb \
+  --fastq reads.fastq.gz --data_type nanopore \
+  --mode stereoseq --barcode_whitelist barcodes.txt \
+  -o stereo_output
+```
+
+Custom platform with molecule definition file:
+```bash
+isoquant.py --reference genome.fa --genedb genes.gtf --complete_genedb \
+  --fastq reads.fastq.gz --data_type nanopore \
+  --mode custom_sc --molecule molecule_definition.mdf \
+  -o custom_output
+```
+
+Pre-called barcodes (skip barcode calling):
+```bash
+isoquant.py --reference genome.fa --genedb genes.gtf --complete_genedb \
+  --bam aligned.bam --data_type nanopore \
+  --mode tenX_v3 --barcoded_reads barcodes.tsv \
+  -o sc_output
+```
+
+## Command line options
+
+`--mode` or `-m`
+
+IsoQuant processing mode. Available modes:
+
+* `bulk` -- standard bulk RNA-seq mode (default)
+* `tenX_v3` -- 10x Genomics single-cell 3' gene expression
+* `curio` -- Curio Bioscience single-cell
+* `visium_hd` -- 10x Genomics Visium HD spatial transcriptomics
+* `visium_5prime` -- 10x Genomics Visium 5' spatial transcriptomics
+* `stereoseq` -- Stereo-seq spatial transcriptomics (BGI), with read splitting
+* `stereoseq_nosplit` -- Stereo-seq without read splitting
+* `custom_sc` -- custom single-cell/spatial mode using a molecule definition file (MDF)
+
+All modes except `bulk` enable automatic barcode calling and UMI-based deduplication.
+
+
+`--barcode_whitelist`
+Path to file(s) with barcode whitelist(s) for barcode calling.
+Required for single-cell/spatial modes unless `--barcoded_reads` is provided.
+
+File should contain one barcode sequence per line. 
+More than 1 tab-separated column is allowed, but only the first will be used.
+Supports plain text and gzipped files.
+
+Note: barcode calling is performed much better if the whitelist contains a small number of barcodes. 
+If you have a subset of barcodes from short-read data, provide them instead of the full whitelist.
+
+The number of whitelist files depends on the mode:
+
+* 1 file: `tenX_v3`, `visium_5prime`, `stereoseq`, `stereoseq_nosplit`, `curio` (combined 14bp barcodes)
+* 2 files: `visium_hd` (part 1 and part 2 barcode lists)
+* Not needed: `custom_sc` (barcode lists are specified inside the MDF file)
+
+`--barcoded_reads`
+Path to TSV file(s) with pre-called barcoded reads.
+Format: `read_id<TAB>barcode<TAB>umi` (one read per line).
+If provided, IsoQuant skips barcode calling and uses these assignments directly.
+More than 3 columns are allowed, but only the first 3 will be used.
+
+Note! IsoQuant does not read barcodes or UMIs from BAM file tags.
+
+`--barcode2spot`
+Path to TSV file mapping barcodes to cell types, spatial spots, or other barcode properties.
+By default, barcode is in the first column, cell type in the second.
+However, you can specify one or more columns via colon symbol (similar to `--read_group`): 
+`file.tsv:barcode_column:spot_column(s)` (e.g., `cell_types.tsv:0:1,2,3` for multiple barcode properties).
+
+When `--barcode2spot` is set, `--read_group barcode_spot` will be set automatically
+to group counts by cell type, spatial regions, or other provided properties.
+
+`--molecule`
+Path to a molecule description format (MDF) file for `custom_sc` mode.
+This file defines the structure of the sequencing molecule (barcodes, UMIs, linkers, polyT, cDNA)
+and allows IsoQuant to process reads from any single-cell or spatial platform.
+See the [MDF format](#molecule-definition-file-mdf-format) section below for details.
+
+## Molecule description format (MDF)
+
+The MDF format allows users to describe the structure of their sequencing molecule
+so that IsoQuant can extract barcodes and UMIs from any platform.
+The molecule is described in the 3' to 5' direction (primer end first, cDNA last).
+
+An MDF file has two parts:
+
+1. **Header line**: colon-separated list of element names defining the order of elements on the molecule (3' to 5').
+2. **Element definitions**: one line per element, tab-separated: `name  type  value  [length]`.
+
+### Element types
+
+| Type | Description                                                                   | Value field              |
+|------|-------------------------------------------------------------------------------|--------------------------|
+| `CONST` | Constant/known sequence (primer, linker, TSO)                                 | Sequence                 |
+| `VAR_FILE` | Variable sequence matched against a whitelist file                            | Path to TSV file         |
+| `VAR_LIST` | Variable sequence matched against an inline list                              | Comma-separated sequences |
+| `VAR_ANY` | Variable fixed-length sequence extracted as-is                                | Length (integer)         |
+| `VAR_ANY_SEPARATOR` | Fixed-length variable separator sequence (not extracted)                      | Length (integer)        |
+| `VAR_ANY_NON_T_SEPARATOR` | Fixed-length variable separator sequence without T nucleoties (not extracted) | Length (integer)         |
+| `PolyT` | PolyT tail                                                                    | (none)                   |
+| `cDNA` | cDNA region                                                                   | (none)                   |
+
+At the moment, only a single `cDNA` and a single `PolyT` are supported. 
+
+Variable elements are expected to have a fixed length (`VAR_FILE` and `VAR_LIST`).
+Using variable-length barcodes may result in suboptimal performance.
+
+### Barcode and UMI identification
+
+Elements are identified as barcodes or UMIs by their name prefix (case-insensitive):
+- Names starting with `barcode` are treated as barcode elements (e.g., `Barcode`, `barcode1`)
+- Names starting with `umi` are treated as UMI elements (e.g., `UMI`, `umi1`)
+
+When multiple barcode (or UMI) elements are present, their sequences are concatenated
+in the order they appear.
+
+### Examples
+
+**10x 3' single-cell v3**:
+```
+R1:Barcode:UMI:PolyT:cDNA:TSO
+R1        CONST      CTACACGACGCTCTTCCGATCT
+Barcode   VAR_FILE   barcodes.tsv
+UMI       VAR_ANY    12
+TSO       CONST      CCCATGTACTCTGCGTTGATACCACTGCTT
+```
+
+**10x 3' single-cell v3** (inline barcodes):
+```
+R1:Barcode:UMI:PolyT:cDNA:TSO
+R1        CONST      CTACACGACGCTCTTCCGATCT
+Barcode   VAR_FILE   AAACCCGGGTTTAAAC,TTTGGGCCCAAATTTG,GGGGAAAACCCCTTTT
+UMI       VAR_ANY    12
+TSO       CONST      CCCATGTACTCTGCGTTGATACCACTGCTT
+```
+
+### Linked elements for multi-part barcodes
+
+When a barcode is split into multiple parts in the molecule, use linked element notation.
+All linked parts must be of the same variable type and reference the same whitelist.
+Parts are numbered consecutively starting from 1.
+
+**Concatenated** (`|`): parts of the barcode separated by other elements.
+Parts are extracted independently, then concatenated and corrected as one sequence against the full whitelist.
+Such structure appears in, for example, Curio Bioscience single-cell protocol:
+
+```
+PCR_PRIMER:Barcode|1:Linker:Barcode|2:UMI:PolyT:cDNA
+PCR_PRIMER  TACACGACGCTCTTCCGATCT
+Barcode|1   VAR_FILE   barcodes.tsv   8
+Linker      CONST      TCTTCAGCGTTCCCGAGA
+Barcode|2   VAR_FILE   barcodes.tsv   6
+UMI         VAR_ANY    9
+```
+
+**Duplicated** (`/`): multiple redundant copies of the same barcode in the molecule.
+Each copy is corrected independently against the whitelist;
+a majority vote determines the final barcode.
+For example:
+```
+PCR_PRIMER:Barcode/1:UMI:PolyT:cDNA:Barcode/2:TSO
+PCR_PRIMER  TACACGACGCTCTTCCGATCT
+Barcode/1   VAR_FILE   barcodes.tsv   16
+Barcode/2   VAR_FILE   barcodes.tsv   16
+UMI         VAR_ANY    12
+TSO       CONST      CCCATGTACTCTGCGTTGATACCACTGCTT
+```
+
+## UMI deduplication
+
+All single-cell and spatial modes perform UMI-based PCR deduplication after isoform assignment.
+Within each cell barcode and gene, reads with similar UMIs (similarity criteria depends on the UMI length)
+are collapsed into a single representative read.
+
+The representative read is selected based on:
+1. Unique isoform assignment over ambiguous
+2. More exons
+3. Longer transcript alignment
+
+## Output
+
+### Count matrices
+
+Single-cell/spatial modes produce grouped count matrices in addition to the standard IsoQuant output.
+Use `--counts_format` to control the output format:
+
+* `default` -- automatic selection: matrix format for small numbers of groups (<=100), MTX for larger datasets
+* `matrix` -- standard matrix format with genes/transcripts as rows and barcodes as columns
+* `mtx` -- Matrix Market (MTX) format compatible with Seurat and Scanpy
+* `none` -- no conversion (only internal linear format is produced)
+
+Grouped counts can also be converted after the run using `src/convert_grouped_counts.py`.
+
+### Grouping reads
+
+Use `--read_group` to control how reads are grouped for quantification.
+Multiple grouping strategies can be combined (space-separated), producing separate count tables for each.
+
+The most common use-cases for single-cell/spatial data are grouping by barcode property (cell type, spot):
+
+`--read_group barcode_spot` (requires `--barcode2spot`)
+
+or grouping by individual barcode (not recommended for datasets with many barcodes):
+
+`--read_group barcode file_name`
+
+
+See the [read grouping options](cmd.md#other-input-options) for the full list of grouping strategies.
diff --git a/isoquant.py b/isoquant.py
index 59f68cfa..60aebc22 100755
--- a/isoquant.py
+++ b/isoquant.py
@@ -46,7 +46,7 @@
 from src.input_data_storage import InputDataStorage, InputDataType
 from src.multimap_resolver import MultimapResolvingStrategy
 from src.stats import combine_counts
-from detect_barcodes import process_single_thread, process_in_parallel, get_umi_length
+from isoquant_detect_barcodes import process_single_thread, process_in_parallel, get_umi_length
 
 
 logger = logging.getLogger('IsoQuant')
@@ -167,17 +167,16 @@ def add_hidden_option(*args, **kwargs):  # show command only with --full-help
                                    help="IsoQuant modes: " + ", ".join(ISOQUANT_MODES) +
                                         "; default:%s" % IsoQuantMode.bulk.name, default=IsoQuantMode.bulk.name)
     add_additional_option_to_group(sc_args_group, '--barcode_whitelist', type=str, nargs='+',
-                                   help='file(s) with barcode whitelist for barcode calling')
+                                   help='file(s) with barcode whitelist(s) for barcode calling')
     add_additional_option_to_group(sc_args_group, "--barcoded_reads", type=str, nargs='+',
                                    help='TSV file(s) with barcoded reads; barcodes will be called automatically if not provided')
-    # TODO: add UMI column, support various formats
-    add_additional_option_to_group(sc_args_group, "--barcode_column", type=str,
-                                   help='column with barcodes in barcoded_reads file, default=1; read id column is 0',
-                                   default=1)
     add_additional_option_to_group(sc_args_group, "--barcode2spot", type=str,
                                    help='TSV file mapping barcode to cell type / spot id. '
                                         'Format: file.tsv or file.tsv:barcode_col:spot_cols '
                                         '(e.g., file.tsv:0:1,2,3 for multiple spot columns)')
+    add_additional_option_to_group(sc_args_group, "--molecule", type=str,
+                                   help='molecule definition file (MDF) for custom_sc mode; '
+                                        'defines molecule structure for universal barcode extraction')
 
     # ALGORITHM
     add_additional_option_to_group(algo_args_group, "--report_novel_unspliced", "-u", type=bool_str,
@@ -531,7 +530,11 @@ def check_input_params(args):
 
     args.umi_length = 0
     if args.mode.needs_barcode_calling():
-        if not args.barcode_whitelist and not args.barcoded_reads:
+        if args.mode == IsoQuantMode.custom_sc:
+            if not args.molecule and not args.barcoded_reads:
+                logger.critical("custom_sc mode requires --molecule or --barcoded_reads")
+                sys.exit(IsoQuantExitCode.BARCODE_WHITELIST_MISSING)
+        elif not args.barcode_whitelist and not args.barcoded_reads:
             logger.critical("You have chosen single-cell/spatial mode %s, please specify barcode whitelist or file with "
                             "barcoded reads" % args.mode.name)
             sys.exit(IsoQuantExitCode.BARCODE_WHITELIST_MISSING)
@@ -607,6 +610,10 @@ def check_input_files(args):
         else:
             check_file_exists(args.barcoded_reads, "Barcoded reads file")
 
+    # Check molecule definition file
+    if hasattr(args, 'molecule') and args.molecule:
+        check_file_exists(args.molecule, "Molecule definition file")
+
     # Check barcode whitelist files
     if hasattr(args, 'barcode_whitelist') and args.barcode_whitelist:
         for wl_file in args.barcode_whitelist:
@@ -958,7 +965,8 @@ def prepare_reference_genome(args):
 
 
 class BarcodeCallingArgs:
-    def __init__(self, input, barcode_whitelist, mode, output, out_fasta, tmp_dir, threads):
+    def __init__(self, input, barcode_whitelist, mode, output, out_fasta, tmp_dir, threads,
+                 molecule: str | None = None):
         self.input = input  # Can be a single file (str) or list of files
         self.barcodes = barcode_whitelist
         self.mode = mode
@@ -966,7 +974,7 @@ def __init__(self, input, barcode_whitelist, mode, output, out_fasta, tmp_dir, t
         self.out_fasta = out_fasta  # Can be a single filename (str), list of filenames, or None
         self.tmp_dir = tmp_dir
         self.threads = threads
-        self.min_score = None
+        self.molecule = molecule
 
 
 def call_barcodes(args):
@@ -999,7 +1007,8 @@ def call_barcodes(args):
 
             bc_threads = 1 if args.mode.enforces_single_thread() else args.threads
             bc_args = BarcodeCallingArgs(input_files, args.barcode_whitelist, args.mode,
-                                         output_barcodes_list, output_fasta_list, sample.aux_dir, bc_threads)
+                                         output_barcodes_list, output_fasta_list, sample.aux_dir, bc_threads,
+                                         molecule=getattr(args, 'molecule', None))
             # Launching barcode calling in a separate process has the following reason:
             # Read chunks are not cleared by the GC in the end of barcode calling, leaving the main
             # IsoQuant process to consume ~2,5 GB even when barcode calling is done.
diff --git a/detect_barcodes.py b/isoquant_detect_barcodes.py
similarity index 89%
rename from detect_barcodes.py
rename to isoquant_detect_barcodes.py
index de20df22..c56d0f5c 100755
--- a/detect_barcodes.py
+++ b/isoquant_detect_barcodes.py
@@ -16,14 +16,13 @@
 from traceback import print_exc
 import shutil
 from collections import defaultdict
-import numpy
 import pysam
 from Bio import SeqIO, Seq, SeqRecord
 import logging
 
 from src.modes import IsoQuantMode
 from src.error_codes import IsoQuantExitCode
-from src.barcode_calling.common import bit_to_str, reverese_complement
+from src.barcode_calling.common import reverese_complement, load_barcodes
 from src.barcode_calling import (
     TenXBarcodeDetector,
     CurioBarcodeDetector,
@@ -31,6 +30,8 @@
     SharedMemoryStereoSplittingBarcodeDetector,
     ReadStats,
     VisiumHDBarcodeDetector,
+    UniversalSingleMoleculeExtractor,
+    MoleculeStructure
 )
 
 logger = logging.getLogger('IsoQuant')
@@ -43,7 +44,8 @@
                          IsoQuantMode.stereoseq_nosplit: SharedMemoryStereoBarcodeDetector,
                          IsoQuantMode.stereoseq: SharedMemoryStereoSplittingBarcodeDetector,
                          IsoQuantMode.visium_5prime: TenXBarcodeDetector,
-                         IsoQuantMode.visium_hd: VisiumHDBarcodeDetector
+                         IsoQuantMode.visium_hd: VisiumHDBarcodeDetector,
+                         IsoQuantMode.custom_sc: UniversalSingleMoleculeExtractor
                          }
 
 BARCODE_FILES_REQUIRED = {IsoQuantMode.tenX_v3: [1],
@@ -51,7 +53,8 @@
                           IsoQuantMode.stereoseq_nosplit: [1],
                           IsoQuantMode.stereoseq: [1],
                           IsoQuantMode.visium_5prime: [1],
-                          IsoQuantMode.visium_hd: [2]
+                          IsoQuantMode.visium_hd: [2],
+                          IsoQuantMode.custom_sc: [0]
                           }
 
 
@@ -108,7 +111,7 @@ def __init__(self, output_file_name, barcode_detector, header=False, output_sequ
             self.output_sequences_file = open(self.output_sequences, "w")
             self.process_function = self._process_read_split
         if header:
-            self.output_file.write(barcode_detector.result_type().header() + "\n")
+            self.output_file.write(barcode_detector.header() + "\n")
         self.read_stat = ReadStats()
 
     def get_stats(self):
@@ -244,13 +247,12 @@ def bam_file_chunk_reader(handler):
     yield current_chunk
 
 
-def process_chunk(barcode_detector, read_chunk, output_file, num, out_fasta=None, min_score=None):
+def process_chunk(barcode_detector, read_chunk, output_file, num, out_fasta=None):
     output_file += "_" + str(num)
     if out_fasta:
         out_fasta += "_" + str(num)
     counter = 0
-    if min_score:
-        barcode_detector.min_score = min_score
+
     barcode_caller = BarcodeCaller(output_file, barcode_detector, output_sequences=out_fasta)
     counter += barcode_caller.process_chunk(read_chunk)
     read_chunk.clear()
@@ -260,13 +262,26 @@ def process_chunk(barcode_detector, read_chunk, output_file, num, out_fasta=None
     return output_file, out_fasta, counter
 
 
-def prepare_barcodes(args):
+def create_barcode_caller(args):
+    logger.info("Creating barcode detector for mode %s" % args.mode.name)
+
+    if args.mode == IsoQuantMode.custom_sc:
+        if not args.molecule:
+            logger.critical("Custom single-cell/spatial mode requires molecule description to be provided via --molecule")
+            sys.exit(IsoQuantExitCode.INCOMPATIBLE_OPTIONS)
+        if not os.path.isfile(args.molecule):
+            logger.critical("Molecule file %s does not exist" % args.molecule)
+            sys.exit(IsoQuantExitCode.INPUT_FILE_NOT_FOUND)
+
+        return BARCODE_CALLING_MODES[args.mode](MoleculeStructure(open(args.molecule)))
+
     logger.info("Using barcodes from %s" % ", ".join(args.barcodes))
     barcode_files = len(args.barcodes)
     if barcode_files not in BARCODE_FILES_REQUIRED[args.mode]:
         logger.critical("Barcode calling mode %s requires %s files, %d provided" %
                         (args.mode.name, " or ".join([str(x) for x in BARCODE_FILES_REQUIRED[args.mode]]), barcode_files))
         sys.exit(IsoQuantExitCode.BARCODE_FILE_COUNT_MISMATCH)
+
     barcodes = []
     for bc in args.barcodes:
         barcodes.append(load_barcodes(bc, needs_iterator=args.mode.needs_barcode_iterator()))
@@ -280,15 +295,14 @@ def prepare_barcodes(args):
             for i, bc in enumerate(barcodes):
                 logger.info("Loaded %d barcodes from %s" % (len(bc), args.barcodes[i]))
         barcodes = tuple(barcodes)
-    return barcodes
+
+    barcode_detector = BARCODE_CALLING_MODES[args.mode](barcodes)
+
+    return barcode_detector
 
 
 def process_single_thread(args):
-    logger.info("Preparing barcodes indices")
-    barcodes = prepare_barcodes(args)
-    barcode_detector = BARCODE_CALLING_MODES[args.mode](barcodes)
-    if args.min_score:
-        barcode_detector.min_score = args.min_score
+    barcode_detector = create_barcode_caller(args)
 
     # args.input, args.output_tsv are always lists
     # args.out_fasta is a list or None
@@ -306,7 +320,7 @@ def process_single_thread(args):
     logger.info("Finished barcode calling")
 
 
-def _process_single_file_in_parallel(input_file, output_tsv, out_fasta, args, barcodes):
+def _process_single_file_in_parallel(input_file, output_tsv, out_fasta, args, barcode_detector):
     """Process a single file in parallel (internal helper function)."""
     logger.info("Processing " + input_file)
     fname, outer_ext = os.path.splitext(os.path.basename(input_file))
@@ -334,15 +348,10 @@ def _process_single_file_in_parallel(input_file, output_tsv, out_fasta, args, ba
         tmp_dir = "barcode_calling_%x" % random.randint(0, 1 << 32)
     if args.tmp_dir:
         tmp_dir = os.path.join(args.tmp_dir, tmp_dir)
+    else:
+        tmp_dir = os.path.join(os.path.dirname(args.output), tmp_dir)
     os.makedirs(tmp_dir)
 
-    barcode_detector = BARCODE_CALLING_MODES[args.mode](barcodes)
-    logger.info("Barcode caller created")
-
-    min_score = None
-    if args.min_score:
-        min_score = args.min_score
-
     tmp_barcode_file = os.path.join(tmp_dir, "bc")
     tmp_fasta_file = os.path.join(tmp_dir, "subreads") if out_fasta else None
     chunk_counter = 0
@@ -363,8 +372,7 @@ def _process_single_file_in_parallel(input_file, output_tsv, out_fasta, args, ba
                                               chunk,
                                               tmp_barcode_file,
                                               chunk_counter,
-                                              tmp_fasta_file,
-                                              min_score))
+                                              tmp_fasta_file))
             chunk_counter += 1
             if chunk_counter >= args.threads:
                 break
@@ -391,15 +399,14 @@ def _process_single_file_in_parallel(input_file, output_tsv, out_fasta, args, ba
                                                           chunk,
                                                           tmp_barcode_file,
                                                           chunk_counter,
-                                                          tmp_fasta_file,
-                                                          min_score))
+                                                          tmp_fasta_file))
                         chunk_counter += 1
                     except StopIteration:
                         reads_left = False
 
     with open(output_tsv, "w") as final_output_tsv:
         final_output_fasta = open(out_fasta, "w") if out_fasta else None
-        header = BARCODE_CALLING_MODES[args.mode].result_type().header()
+        header = barcode_detector.header()
         final_output_tsv.write(header + "\n")
         stat_dict = defaultdict(int)
         for tmp_file, tmp_fasta in output_files:
@@ -427,46 +434,16 @@ def process_in_parallel(args):
     # args.input, args.output_tsv are always lists
     # args.out_fasta is a list or None
     out_fastas = args.out_fasta if args.out_fasta else [None] * len(args.input)
-
-    # Prepare barcodes once for all files
-    barcodes = prepare_barcodes(args)
+    barcode_detector = create_barcode_caller(args)
 
     for idx, (input_file, output_tsv, out_fasta) in enumerate(zip(args.input, args.output_tsv, out_fastas)):
         if len(args.input) > 1:
             logger.info("Processing file %d/%d: %s" % (idx + 1, len(args.input), input_file))
-        _process_single_file_in_parallel(input_file, output_tsv, out_fasta, args, barcodes)
+        _process_single_file_in_parallel(input_file, output_tsv, out_fasta, args, barcode_detector)
 
     logger.info("Finished barcode calling")
 
 
-def load_barcodes(inf, needs_iterator=False):
-    if inf.endswith("h5") or inf.endswith("hdf5"):
-        return load_h5_barcodes_bit(inf)
-
-    if inf.endswith("gz") or inf.endswith("gzip"):
-        handle = gzip.open(inf, "rt")
-    else:
-        handle = open(inf, "r")
-
-    barcode_iterator = iter(l.strip().split()[0] for l in handle)
-    if needs_iterator:
-        return barcode_iterator
-
-    return [b for b in barcode_iterator]
-
-
-def load_h5_barcodes_bit(h5_file_path, dataset_name='bpMatrix_1'):
-    raise NotImplementedError()
-    import h5py
-    barcode_list = []
-    with h5py.File(h5_file_path, 'r') as h5_file:
-        dataset = numpy.array(h5_file[dataset_name])
-        for row in dataset:
-            for col in row:
-                barcode_list.append(bit_to_str(int(col[0])))
-    return barcode_list
-
-
 def set_logger(logger_instance, args):
     logger_instance.setLevel(logging.INFO)
     if args.debug:
@@ -490,14 +467,13 @@ def add_hidden_option(*args, **kwargs):  # show command only with --full-help
     parser.add_argument("--output", "-o", type=str, help="output prefix name", required=True)
     parser.add_argument("--barcodes", "-b", nargs='+', type=str, help="barcode whitelist(s)", required=False)
     # parser.add_argument("--umi", "-u", type=str, help="potential UMIs, detected de novo if not set")
-    parser.add_argument("--mode", type=str, help="mode to be used", choices=[x.name for x in BARCODE_CALLING_MODES.keys()],
-                        default=IsoQuantMode.stereoseq.name)
+    parser.add_argument("--mode", type=str, help="mode to be used", choices=[x.name for x in BARCODE_CALLING_MODES.keys()])
+    parser.add_argument("--molecule", type=str, help="MDF files with molecule description (for custom_sc mode only)")
+
     parser.add_argument("--input", "-i", nargs='+', type=str, help="input reads in [gzipped] FASTA, FASTQ, BAM, SAM",
                         required=True)
     parser.add_argument("--threads", "-t", type=int, help="threads to use (16)", default=16)
     parser.add_argument("--tmp_dir", type=str, help="folder for temporary files")
-    parser.add_argument("--min_score", type=int, help="minimal barcode score "
-                                                      "(scoring system is +1, -1, -1, -1)")
     add_hidden_option('--debug', action='store_true', default=False, help='Debug log output.')
 
     args = parser.parse_args(sys_argv)
diff --git a/misc/assess_allinfo_quality.py b/misc/assess_allinfo_quality.py
new file mode 100644
index 00000000..f71f938c
--- /dev/null
+++ b/misc/assess_allinfo_quality.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+
+############################################################################
+# Copyright (c) 2022-2026 University of Helsinki
+# Copyright (c) 2020-2022 Saint Petersburg State University
+# # All Rights Reserved
+# See file LICENSE for details.
+############################################################################
+
+# Assess quality of UMI-filtered allinfo output by independently computing
+# statistics from the allinfo file and cross-validating against stats.tsv.
+
+import argparse
+import gzip
+import logging
+import sys
+from collections import Counter
+from typing import Dict, Tuple
+
+logger = logging.getLogger('AllinfoQA')
+
+
+def setup_logger():
+    logger.setLevel(logging.INFO)
+    ch = logging.StreamHandler(sys.stdout)
+    ch.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+    ch.setFormatter(formatter)
+    logger.addHandler(ch)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Assess quality of UMI-filtered allinfo output")
+    parser.add_argument("--allinfo", type=str, required=True,
+                        help="Path to allinfo file (plain or .gz)")
+    parser.add_argument("--stats", type=str, required=True,
+                        help="Path to stats.tsv file from UMI filtering")
+    parser.add_argument("--output", "-o", type=str, required=True,
+                        help="Output quality report file (TSV)")
+    return parser.parse_args()
+
+
+def load_tsv_config(filepath: str) -> Dict[str, str]:
+    config = {}
+    with open(filepath) as f:
+        for line in f:
+            if line.startswith("#"):
+                continue
+            tokens = line.strip().split('\t')
+            if len(tokens) >= 2:
+                config[tokens[0]] = tokens[1]
+    return config
+
+
+def compute_allinfo_stats(allinfo_path: str) -> Tuple[Dict[str, int], int]:
+    """Parse allinfo file and compute statistics.
+
+    Returns:
+        Tuple of (stats_dict, duplicate_triplet_count)
+    """
+    open_fn = gzip.open if allinfo_path.endswith('.gz') else open
+    mode = 'rt' if allinfo_path.endswith('.gz') else 'r'
+
+    total_reads = 0
+    spliced_reads = 0
+    gene_barcode_pairs: set = set()
+    triplet_counts: Counter = Counter()
+
+    with open_fn(allinfo_path, mode) as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+
+            fields = line.split('\t')
+            if len(fields) < 13:
+                logger.warning("Skipping malformed line with %d fields: %s",
+                               len(fields), line[:100])
+                continue
+
+            total_reads += 1
+
+            # Columns (0-indexed):
+            # 0: read_id, 1: gene_id, 2: cell_type, 3: barcode, 4: umi,
+            # 5: introns, 6: TSS, 7: polyA, 8: exons, 9: read_type,
+            # 10: num_introns, 11: transcript_id, 12: transcript_type
+            gene_id = fields[1]
+            barcode = fields[3]
+            umi = fields[4]
+            num_introns = int(fields[10])
+
+            if num_introns > 0:
+                spliced_reads += 1
+
+            gene_barcode_pairs.add((gene_id, barcode))
+            triplet_counts[(gene_id, barcode, umi)] += 1
+
+    duplicate_triplets = sum(1 for count in triplet_counts.values() if count > 1)
+
+    stats = {
+        "Total reads saved": total_reads,
+        "Spliced reads saved": spliced_reads,
+        "Unique gene-barcodes pairs": len(gene_barcode_pairs),
+    }
+    return stats, duplicate_triplets
+
+
+def cross_validate(computed_stats: Dict[str, int],
+                   stats_tsv: Dict[str, str]) -> bool:
+    """Cross-validate computed allinfo stats against stats.tsv values.
+
+    Returns True if all checks pass.
+    """
+    all_match = True
+
+    checks = [
+        ("Total reads saved", "Total reads saved"),
+        ("Spliced reads saved", "Spliced reads saved"),
+        ("Unique gene-barcodes pairs", "Unique gene-barcodes pairs"),
+    ]
+
+    for computed_key, stats_key in checks:
+        if stats_key not in stats_tsv:
+            logger.error("Key '%s' not found in stats.tsv", stats_key)
+            all_match = False
+            continue
+
+        computed_val = computed_stats[computed_key]
+        stats_val = int(stats_tsv[stats_key])
+
+        if computed_val != stats_val:
+            logger.error("Mismatch for '%s': allinfo=%d, stats.tsv=%d",
+                         computed_key, computed_val, stats_val)
+            all_match = False
+        else:
+            logger.info("OK '%s': %d", computed_key, computed_val)
+
+    return all_match
+
+
+def main():
+    args = parse_args()
+    setup_logger()
+
+    logger.info("Loading allinfo from %s", args.allinfo)
+    computed_stats, duplicate_triplets = compute_allinfo_stats(args.allinfo)
+    logger.info("Computed stats from allinfo: %s", computed_stats)
+
+    logger.info("Loading stats from %s", args.stats)
+    stats_tsv = load_tsv_config(args.stats)
+    logger.info("Stats.tsv contains %d entries", len(stats_tsv))
+
+    # Cross-validate
+    stats_match = cross_validate(computed_stats, stats_tsv)
+
+    if duplicate_triplets > 0:
+        logger.error("Found %d duplicate gene-barcode-UMI triplets in allinfo",
+                      duplicate_triplets)
+    else:
+        logger.info("No duplicate gene-barcode-UMI triplets found")
+
+    # Write quality report
+    exit_code = 0
+    with open(args.output, "w") as outf:
+        # Pass through all stats.tsv entries
+        for key, val in stats_tsv.items():
+            outf.write("%s\t%s\n" % (key, val))
+
+        # Add computed validation metrics
+        outf.write("duplicate_triplets\t%d\n" % duplicate_triplets)
+        allinfo_match_val = 1 if stats_match else 0
+        outf.write("allinfo_stats_match\t%d\n" % allinfo_match_val)
+
+    logger.info("Quality report written to %s", args.output)
+
+    if not stats_match:
+        logger.error("FAILED: allinfo stats do not match stats.tsv")
+        exit_code = 1
+    if duplicate_triplets > 0:
+        logger.error("FAILED: duplicate gene-barcode-UMI triplets detected")
+        exit_code = 1
+
+    if exit_code == 0:
+        logger.info("All checks passed")
+
+    return exit_code
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/misc/assess_barcode_quality.py b/misc/assess_barcode_quality.py
index 5db838a0..66a537f5 100644
--- a/misc/assess_barcode_quality.py
+++ b/misc/assess_barcode_quality.py
@@ -384,7 +384,7 @@ def parse_args():
                         choices=['tenX_v3', 'visium_hd', 'curio', 'stereo', 'stereo_split'],
                         help='Barcode calling mode')
     parser.add_argument('--input', '-i', required=True,
-                        help='Input barcode TSV file from detect_barcodes.py')
+                        help='Input barcode TSV file from isoquant_detect_barcodes.py')
     parser.add_argument('--output', '-o',
                         help='Output metrics TSV file (default: stdout)')
     parser.add_argument('--barcode_col', type=int, default=1,
diff --git a/misc/create_stereo_test_subsets.py b/misc/create_stereo_test_subsets.py
new file mode 100644
index 00000000..d72ef807
--- /dev/null
+++ b/misc/create_stereo_test_subsets.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+"""
+Create stereo-seq barcode subsets and corresponding filtered read files.
+
+From the full 10M stereo barcode whitelist:
+1. Randomly select 1M barcodes -> stereo.1M.tsv
+2. Randomly select 80K barcodes (subset of the 1M) -> stereo.80K.tsv
+3. Filter reads to keep only those matching each subset
+
+Read ID format: >READ_<idx>_<transcript>_<BARCODE>_<UMI>_<extra>...
+Barcode is field 3 (0-indexed, underscore-delimited).
+"""
+
+import os
+import random
+import sys
+
+DATA_DIR = "/abga/work/andreyp/ci_isoquant/data/barcodes"
+
+FULL_BARCODE_FILE = os.path.join(DATA_DIR, "D04620C2.10M.tsv")
+FULL_READS_FILE = os.path.join(DATA_DIR, "Mouse.StereoSeq.D04620C2.10M.800K.fa")
+
+SUBSET_1M_FILE = os.path.join(DATA_DIR, "stereo.1M.tsv")
+SUBSET_80K_FILE = os.path.join(DATA_DIR, "stereo.80K.tsv")
+READS_1M_FILE = os.path.join(DATA_DIR, "Mouse.StereoSeq.custom_sc.1M.fa")
+READS_80K_FILE = os.path.join(DATA_DIR, "Mouse.StereoSeq.custom_sc.80K.fa")
+
+SUBSET_1M_SIZE = 1000000
+SUBSET_80K_SIZE = 80000
+
+
+def load_barcodes_with_lines(filepath: str) -> tuple[list[str], list[str]]:
+    """Load barcodes (first column) and full lines from TSV file."""
+    barcodes = []
+    lines = []
+    with open(filepath) as f:
+        for line in f:
+            stripped = line.strip()
+            if not stripped:
+                continue
+            bc = stripped.split()[0]
+            barcodes.append(bc)
+            lines.append(line)
+    return barcodes, lines
+
+
+def extract_barcode_from_read_id(read_id: str) -> str:
+    """Extract barcode from FASTA read ID.
+
+    Format: >READ_idx_transcript_BARCODE_UMI_...
+    """
+    parts = read_id.lstrip(">").split("_")
+    if len(parts) >= 4:
+        return parts[3]
+    return ""
+
+
+def filter_reads_fasta(input_fasta: str, output_fasta: str, barcode_set: set[str]) -> int:
+    """Filter FASTA file, keeping reads with barcodes in the given set."""
+    kept = 0
+    total = 0
+    with open(input_fasta) as inf, open(output_fasta, "w") as outf:
+        keep_current = False
+        for line in inf:
+            if line.startswith(">"):
+                total += 1
+                bc = extract_barcode_from_read_id(line.strip())
+                keep_current = bc in barcode_set
+                if keep_current:
+                    kept += 1
+            if keep_current:
+                outf.write(line)
+    return kept, total
+
+
+def main():
+    random.seed(42)
+
+    print("Loading full barcode set...")
+    barcodes, lines = load_barcodes_with_lines(FULL_BARCODE_FILE)
+    print(f"  Loaded {len(barcodes)} barcodes")
+
+    # Select 1M subset
+    print(f"Selecting {SUBSET_1M_SIZE} barcodes for 1M subset...")
+    indices_1m = sorted(random.sample(range(len(barcodes)), SUBSET_1M_SIZE))
+    barcodes_1m = set(barcodes[i] for i in indices_1m)
+
+    with open(SUBSET_1M_FILE, "w") as f:
+        for i in indices_1m:
+            f.write(lines[i])
+    print(f"  Written to {SUBSET_1M_FILE}")
+
+    # Select 80K subset from the 1M subset
+    print(f"Selecting {SUBSET_80K_SIZE} barcodes for 80K subset (from 1M)...")
+    indices_80k_in_1m = sorted(random.sample(range(len(indices_1m)), SUBSET_80K_SIZE))
+    barcodes_80k = set(barcodes[indices_1m[i]] for i in indices_80k_in_1m)
+
+    with open(SUBSET_80K_FILE, "w") as f:
+        for i in indices_80k_in_1m:
+            orig_idx = indices_1m[i]
+            f.write(lines[orig_idx])
+    print(f"  Written to {SUBSET_80K_FILE}")
+
+    # Filter reads for 1M subset
+    print("Filtering reads for 1M subset...")
+    kept_1m, total = filter_reads_fasta(FULL_READS_FILE, READS_1M_FILE, barcodes_1m)
+    print(f"  Kept {kept_1m}/{total} reads -> {READS_1M_FILE}")
+
+    # Filter reads for 80K subset
+    print("Filtering reads for 80K subset...")
+    kept_80k, total = filter_reads_fasta(FULL_READS_FILE, READS_80K_FILE, barcodes_80k)
+    print(f"  Kept {kept_80k}/{total} reads -> {READS_80K_FILE}")
+
+    print("Done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/misc/create_test_barcodes.py b/misc/create_test_barcodes.py
new file mode 100644
index 00000000..a27ef373
--- /dev/null
+++ b/misc/create_test_barcodes.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""
+Generate inflated barcode whitelist files for testing large k-mer indexer paths.
+
+Appends random unique DNA sequences to existing barcode files to push
+the total count above 100K, triggering Array2BitKmerIndexer / Dict2BitKmerIndexer
+in the universal barcode calling algorithm.
+"""
+
+import os
+import random
+import sys
+
+NUCLEOTIDES = "ACGT"
+DATA_DIR = "/abga/work/andreyp/ci_isoquant/data/barcodes"
+
+# Configuration: (input_file, output_file, dummy_count, barcode_length)
+CONFIGS = [
+    ("10xMultiome_5K.tsv", "10x.large_barcodes.tsv", 120000, 16),
+    ("A0079_044_BeadBarcodes.tsv", "curio.large_barcodes.tsv", 50000, 14),
+    ("visium_v1_hd.slide1.barcodes1.tsv", "visium.large_barcodes1.tsv", 120000, 15),
+    ("visium_v1_hd.slide1.barcodes2.tsv", "visium.large_barcodes2.tsv", 120000, 14),
+]
+
+
+def load_existing_barcodes(filepath: str) -> set[str]:
+    """Load first column from TSV barcode file."""
+    barcodes = set()
+    with open(filepath) as f:
+        for line in f:
+            bc = line.strip().split()[0]
+            if bc:
+                barcodes.add(bc)
+    return barcodes
+
+
+def generate_random_barcode(length: int) -> str:
+    return "".join(random.choice(NUCLEOTIDES) for _ in range(length))
+
+
+def generate_unique_barcodes(count: int, length: int, existing: set[str]) -> list[str]:
+    """Generate random unique barcodes not in the existing set."""
+    new_barcodes = []
+    seen = set(existing)
+    while len(new_barcodes) < count:
+        bc = generate_random_barcode(length)
+        if bc not in seen:
+            seen.add(bc)
+            new_barcodes.append(bc)
+    return new_barcodes
+
+
+def main():
+    random.seed(42)  # Reproducible results
+
+    for input_name, output_name, dummy_count, bc_length in CONFIGS:
+        input_path = os.path.join(DATA_DIR, input_name)
+        output_path = os.path.join(DATA_DIR, output_name)
+
+        print(f"Processing {input_name} -> {output_name}")
+        existing = load_existing_barcodes(input_path)
+        print(f"  Loaded {len(existing)} existing barcodes")
+
+        dummy = generate_unique_barcodes(dummy_count, bc_length, existing)
+        print(f"  Generated {len(dummy)} dummy barcodes ({bc_length}bp)")
+
+        # Write output: copy original file + append dummy barcodes
+        with open(output_path, "w") as out:
+            with open(input_path) as inp:
+                for line in inp:
+                    out.write(line)
+            for bc in dummy:
+                out.write(bc + "\n")
+
+        total = len(existing) + len(dummy)
+        print(f"  Total: {total} barcodes -> {output_path}")
+
+    print("Done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/barcode_calling/__init__.py b/src/barcode_calling/__init__.py
index f04b78c6..ae7cca81 100644
--- a/src/barcode_calling/__init__.py
+++ b/src/barcode_calling/__init__.py
@@ -49,6 +49,8 @@
     SharedMemoryWrapper,
     TenXBarcodeDetector,
     VisiumHDBarcodeDetector,
+    UniversalSingleMoleculeExtractor,
+    MoleculeStructure
 )
 
 # Utilities
@@ -82,6 +84,9 @@
     # 10x detectors
     'TenXBarcodeDetector',
     'VisiumHDBarcodeDetector',
+    # universal calling
+    'UniversalSingleMoleculeExtractor',
+    'MoleculeStructure',
     # Utilities
     'str_to_2bit',
     'bit_to_str',
diff --git a/src/barcode_calling/callers/__init__.py b/src/barcode_calling/callers/__init__.py
index 4263cf5d..512f1045 100644
--- a/src/barcode_calling/callers/__init__.py
+++ b/src/barcode_calling/callers/__init__.py
@@ -7,12 +7,16 @@
 """
 Barcode detection classes for single-cell and spatial transcriptomics.
 
+Protocol:
+    BarcodeResult: Protocol interface for all detection results
+
 Classes:
     BarcodeDetectionResult: Base result class
-    CurioBarcodeDetectionResult: Result for Curio (double barcode) platform
-    StereoBarcodeDetectionResult: Result for Stereo-seq platform
+    LinkerBarcodeDetectionResult: Result for platforms with linker sequences
+    TSOBarcodeDetectionResult: Result for Stereo-seq with TSO detection
     TenXBarcodeDetectionResult: Result for 10x Genomics platform
     SplittingBarcodeDetectionResult: Result for read splitting modes
+    ExtractionResult: Dict-based result for universal molecule extraction
     ReadStats: Statistics tracker for barcode detection
 
 Detectors:
@@ -25,8 +29,11 @@
     SharedMemoryWrapper: Generic shared memory wrapper
     TenXBarcodeDetector: 10x Genomics v3 detector
     VisiumHDBarcodeDetector: Visium HD detector
+    UniversalSingleMoleculeExtractor: Universal barcode detector for custom molecules
 """
 
+from .protocol import BarcodeResult
+
 from .base import (
     BarcodeDetectionResult,
     LinkerBarcodeDetectionResult,
@@ -37,6 +44,11 @@
     increase_if_valid,
 )
 
+from .extraction_result import (
+    ExtractionResult,
+    DetectedElement,
+)
+
 from .curio import (
     CurioBarcodeDetector,
     CurioIlluminaDetector,
@@ -55,13 +67,21 @@
     VisiumHDBarcodeDetector,
 )
 
+from .molecule_structure import MoleculeStructure
+
+from .universal_extraction import UniversalSingleMoleculeExtractor
+
 __all__ = [
+    # Protocol
+    'BarcodeResult',
     # Result classes
     'BarcodeDetectionResult',
     'LinkerBarcodeDetectionResult',
     'TSOBarcodeDetectionResult',
     'TenXBarcodeDetectionResult',
     'SplittingBarcodeDetectionResult',
+    'ExtractionResult',
+    'DetectedElement',
     'ReadStats',
     'increase_if_valid',
     # Curio detectors
@@ -76,4 +96,7 @@
     # 10x detectors
     'TenXBarcodeDetector',
     'VisiumHDBarcodeDetector',
+    # Universal extraction
+    'MoleculeStructure',
+    'UniversalSingleMoleculeExtractor',
 ]
diff --git a/src/barcode_calling/callers/base.py b/src/barcode_calling/callers/base.py
index bc6a9258..d9d3714c 100644
--- a/src/barcode_calling/callers/base.py
+++ b/src/barcode_calling/callers/base.py
@@ -30,11 +30,15 @@ def increase_if_valid(val: Optional[int], delta: int) -> Optional[int]:
     return val
 
 
+
+
+
 class BarcodeDetectionResult:
     """
     Base class for barcode detection results.
 
     Stores detected barcode, UMI, and quality scores for a single read.
+    Implements the BarcodeResult protocol.
     """
 
     NOSEQ = "*"  # Sentinel for missing/undetected sequence
@@ -53,19 +57,51 @@ def __init__(self, read_id: str, barcode: str = NOSEQ, UMI: str = NOSEQ,
             strand: Detected strand ('+', '-', or '.')
         """
         self.read_id: str = read_id
-        self.barcode: str = barcode
-        if not self.barcode:
-            self.barcode = BarcodeDetectionResult.NOSEQ
-        self.UMI: str = UMI
-        if not self.UMI:
-            self.UMI = BarcodeDetectionResult.NOSEQ
+        self._barcode: str = barcode if barcode else BarcodeDetectionResult.NOSEQ
+        self._umi: str = UMI if UMI else BarcodeDetectionResult.NOSEQ
         self.BC_score: int = BC_score
         self.UMI_good: bool = UMI_good
         self.strand: str = strand
 
+    # Primary getters (preferred interface)
+    def get_barcode(self) -> str:
+        """Get the detected barcode sequence."""
+        return self._barcode
+
+    def get_umi(self) -> str:
+        """Get the detected UMI sequence."""
+        return self._umi
+
+    # Backward-compatible property accessors
+    @property
+    def barcode(self) -> str:
+        """Barcode sequence. Prefer get_barcode() for new code."""
+        return self._barcode
+
+    @barcode.setter
+    def barcode(self, value: str) -> None:
+        self._barcode = value if value else BarcodeDetectionResult.NOSEQ
+
+    @property
+    def UMI(self) -> str:
+        """UMI sequence. Prefer get_umi() for new code."""
+        return self._umi
+
+    @UMI.setter
+    def UMI(self, value: str) -> None:
+        self._umi = value if value else BarcodeDetectionResult.NOSEQ
+
     def is_valid(self) -> bool:
         """Check if a valid barcode was detected."""
-        return self.barcode != BarcodeDetectionResult.NOSEQ
+        return self._barcode != BarcodeDetectionResult.NOSEQ
+
+    def has_barcode(self) -> bool:
+        """Check if a barcode was detected (alias for is_valid())."""
+        return self._barcode != BarcodeDetectionResult.NOSEQ
+
+    def has_umi(self) -> bool:
+        """Check if a valid UMI was detected."""
+        return self._umi != BarcodeDetectionResult.NOSEQ
 
     def update_coordinates(self, delta: int) -> None:
         """
@@ -98,12 +134,9 @@ def get_additional_attributes(self) -> List[str]:
         Get list of detected additional features (primer, linker, etc.).
 
         Returns:
-            List of detected feature names
-
-        Raises:
-            NotImplementedError: Must be implemented by subclasses
+            List of detected feature names. Empty list for base class.
         """
-        raise NotImplementedError()
+        return []
 
     def set_strand(self, strand: str) -> None:
         """Set the detected strand."""
@@ -111,12 +144,12 @@ def set_strand(self, strand: str) -> None:
 
     def __str__(self) -> str:
         """Format result as TSV line."""
-        return "%s\t%s\t%s\t%d\t%s\t%s" % (self.read_id, self.barcode, self.UMI,
+        return "%s\t%s\t%s\t%d\t%s\t%s" % (self.read_id, self._barcode, self._umi,
                                            self.BC_score, self.UMI_good, self.strand)
 
     @staticmethod
     def header() -> str:
-        """Get TSV header for result output."""
+        """Static header for class-level access."""
         return "#read_id\tbarcode\tUMI\tBC_score\tvalid_UMI\tstrand"
 
 
@@ -153,9 +186,6 @@ def __init__(self, read_id: str, barcode: str = BarcodeDetectionResult.NOSEQ,
         self.linker_end: int = linker_end
         self.polyT: int = polyT
 
-    def is_valid(self) -> bool:
-        return self.barcode != BarcodeDetectionResult.NOSEQ
-
     def update_coordinates(self, delta: int) -> None:
         self.primer = increase_if_valid(self.primer, delta)
         self.linker_start = increase_if_valid(self.linker_start, delta)
@@ -181,15 +211,13 @@ def get_additional_attributes(self) -> List[str]:
             attr.append("Linker detected")
         return attr
 
-    def set_strand(self, strand: str) -> None:
-        self.strand = strand
-
     def __str__(self) -> str:
         return (BarcodeDetectionResult.__str__(self) +
                 "\t%d\t%d\t%d\t%d" % (self.polyT, self.primer, self.linker_start, self.linker_end))
 
     @staticmethod
     def header() -> str:
+        """Static header for class-level access."""
         return BarcodeDetectionResult.header() + "\tpolyT_start\tprimer_end\tlinker_start\tlinker_end"
 
 
@@ -227,6 +255,7 @@ def get_additional_attributes(self) -> List[str]:
 
     @staticmethod
     def header() -> str:
+        """Static header for class-level access."""
         return LinkerBarcodeDetectionResult.header() + "\tTSO5"
 
 
@@ -241,9 +270,6 @@ def __init__(self, read_id: str, barcode: str = BarcodeDetectionResult.NOSEQ,
         self.r1: int = r1
         self.polyT: int = polyT
 
-    def is_valid(self) -> bool:
-        return self.barcode != BarcodeDetectionResult.NOSEQ
-
     def update_coordinates(self, delta: int) -> None:
         self.r1 = increase_if_valid(self.r1, delta)
         self.polyT = increase_if_valid(self.polyT, delta)
@@ -263,23 +289,24 @@ def get_additional_attributes(self) -> List[str]:
             attr.append("R1 detected")
         return attr
 
-    def set_strand(self, strand: str) -> None:
-        self.strand = strand
-
     def __str__(self) -> str:
         return (BarcodeDetectionResult.__str__(self) +
                 "\t%d\t%d" % (self.polyT, self.r1))
 
     @staticmethod
     def header() -> str:
+        """Static header for class-level access."""
         return BarcodeDetectionResult.header() + "\tpolyT_start\tR1_end"
 
 
 class SplittingBarcodeDetectionResult:
     """Result container for read splitting modes (multiple barcodes per read)."""
 
+    NOSEQ = BarcodeDetectionResult.NOSEQ  # For consistency with protocol
+
     def __init__(self, read_id: str):
         self.read_id: str = read_id
+        self.strand: str = "."  # For protocol compatibility
         self.detected_patterns: List[TSOBarcodeDetectionResult] = []
 
     def append(self, barcode_detection_result: TSOBarcodeDetectionResult) -> None:
@@ -293,7 +320,7 @@ def filter(self) -> None:
             return
         barcoded_results = []
         for r in self.detected_patterns:
-            if r.barcode != BarcodeDetectionResult.NOSEQ:
+            if r.is_valid():
                 barcoded_results.append(r)
 
         if not barcoded_results:
@@ -301,8 +328,63 @@ def filter(self) -> None:
         else:
             self.detected_patterns = barcoded_results
 
+    def get_barcode(self) -> str:
+        """Get barcode from first valid pattern, or NOSEQ if none."""
+        for r in self.detected_patterns:
+            if r.is_valid():
+                return r.get_barcode()
+        return self.NOSEQ
+
+    def get_umi(self) -> str:
+        """Get UMI from first valid pattern, or NOSEQ if none."""
+        for r in self.detected_patterns:
+            if r.is_valid():
+                return r.get_umi()
+        return self.NOSEQ
+
+    def is_valid(self) -> bool:
+        """Check if any pattern has a valid barcode."""
+        return any(r.is_valid() for r in self.detected_patterns)
+
+    def has_barcode(self) -> bool:
+        """Check if any pattern has a valid barcode."""
+        return any(r.has_barcode() for r in self.detected_patterns)
+
+    def has_umi(self) -> bool:
+        """Check if any pattern has a valid UMI."""
+        return any(r.has_umi() for r in self.detected_patterns)
+
+    def set_strand(self, strand: str) -> None:
+        """Set strand for all patterns."""
+        self.strand = strand
+        for r in self.detected_patterns:
+            r.set_strand(strand)
+
+    def update_coordinates(self, delta: int) -> None:
+        """Update coordinates for all patterns."""
+        for r in self.detected_patterns:
+            r.update_coordinates(delta)
+
+    def more_informative_than(self, other: 'SplittingBarcodeDetectionResult') -> bool:
+        """Compare by number of valid patterns."""
+        self_valid = sum(1 for r in self.detected_patterns if r.is_valid())
+        other_valid = sum(1 for r in other.detected_patterns if r.is_valid())
+        return self_valid > other_valid
+
+    def get_additional_attributes(self) -> List[str]:
+        """Get combined attributes from all patterns."""
+        attrs = set()
+        for r in self.detected_patterns:
+            attrs.update(r.get_additional_attributes())
+        return list(attrs)
+
+    def __str__(self) -> str:
+        """Format all patterns as TSV lines."""
+        return "\n".join(str(r) for r in self.detected_patterns)
+
     @staticmethod
     def header() -> str:
+        """Get TSV header for result output."""
         return TSOBarcodeDetectionResult.header()
 
 
@@ -321,22 +403,22 @@ def __init__(self):
         self.umi_count: int = 0
         self.additional_attributes_counts: Dict[str, int] = defaultdict(int)
 
-    def add_read(self, barcode_detection_result: BarcodeDetectionResult) -> None:
+    def add_read(self, barcode_detection_result) -> None:
         """
         Add a read result to statistics.
 
         Args:
-            barcode_detection_result: Detection result to accumulate
+            barcode_detection_result: Detection result to accumulate (implements BarcodeResult protocol)
         """
         self.read_count += 1
         # Count detected features (primer, linker, etc.)
         for a in barcode_detection_result.get_additional_attributes():
             self.additional_attributes_counts[a] += 1
         # Count valid barcode
-        if barcode_detection_result.barcode != BarcodeDetectionResult.NOSEQ:
+        if barcode_detection_result.has_barcode():
             self.bc_count += 1
         # Count valid UMI
-        if barcode_detection_result.UMI_good:
+        if barcode_detection_result.has_umi():
             self.umi_count += 1
 
     def add_custom_stats(self, stat_name: str, val: int) -> None:
diff --git a/src/barcode_calling/callers/curio.py b/src/barcode_calling/callers/curio.py
index 24434359..df2aa3ca 100644
--- a/src/barcode_calling/callers/curio.py
+++ b/src/barcode_calling/callers/curio.py
@@ -13,10 +13,11 @@
 import logging
 from typing import List, Optional
 
+from ..indexers import Array2BitKmerIndexer
 from ..indexers import KmerIndexer, ArrayKmerIndexer
 from ..common import (
     find_polyt_start, reverese_complement,
-    find_candidate_with_max_score_ssw, detect_exact_positions,
+    find_candidate_with_max_score_ssw, detect_exact_positions, str_to_2bit,
 )
 from .base import LinkerBarcodeDetectionResult
 
@@ -41,8 +42,7 @@ class CurioBarcodeDetector:
     TERMINAL_MATCH_DELTA = 2
     STRICT_TERMINAL_MATCH_DELTA = 1
 
-    def __init__(self, barcode_list: List[str], umi_list: Optional[List[str]] = None,
-                 min_score: int = 13):
+    def __init__(self, barcode_list: List[str]):
         """
         Initialize Curio barcode detector.
 
@@ -61,13 +61,15 @@ def __init__(self, barcode_list: List[str], umi_list: Optional[List[str]] = None
                     joint_barcode_list.append(b1 + b2)
         else:
             joint_barcode_list = barcode_list
-        self.barcode_indexer = ArrayKmerIndexer(joint_barcode_list, kmer_size=6)
-        self.umi_set = None
-        if umi_list:
-            self.umi_set = set(umi_list)
-            logger.debug("Loaded %d UMIs" % len(umi_list))
-            self.umi_indexer = KmerIndexer(umi_list, kmer_size=5)
-        self.min_score = min_score
+        if len(joint_barcode_list) <= 100000:
+            self.barcode_indexer = ArrayKmerIndexer(joint_barcode_list, kmer_size=6)
+            self.max_barcodes_hits = 20
+            self.min_matching_kmers = 1
+        else:
+            self.barcode_indexer = Array2BitKmerIndexer([str_to_2bit(b) for b in joint_barcode_list], kmer_size=6, seq_len=self.BC_LENGTH)
+            self.max_barcodes_hits = 10
+            self.min_matching_kmers = 2
+        self.min_score = 13
 
     def find_barcode_umi(self, read_id: str, sequence: str) -> LinkerBarcodeDetectionResult:
         """
@@ -149,7 +151,9 @@ def _find_barcode_umi_fwd(self, read_id: str, sequence: str) -> LinkerBarcodeDet
         barcode_end = linker_end + self.RIGHT_BC_LENGTH + 1
         potential_barcode = sequence[barcode_start:linker_start] + sequence[linker_end + 1:barcode_end + 1]
         logger.debug("Barcode: %s" % (potential_barcode))
-        matching_barcodes = self.barcode_indexer.get_occurrences(potential_barcode)
+        matching_barcodes = self.barcode_indexer.get_occurrences(potential_barcode,
+                                                                 max_hits=self.max_barcodes_hits,
+                                                                 min_kmers=self.min_matching_kmers)
         barcode, bc_score, bc_start, bc_end = \
             find_candidate_with_max_score_ssw(matching_barcodes, potential_barcode, min_score=self.min_score)
 
@@ -167,19 +171,11 @@ def _find_barcode_umi_fwd(self, read_id: str, sequence: str) -> LinkerBarcodeDet
         potential_umi = sequence[potential_umi_start:potential_umi_end + 1]
         logger.debug("Potential UMI: %s" % potential_umi)
 
-        umi = None
         good_umi = False
-        if self.umi_set:
-            matching_umis = self.umi_indexer.get_occurrences(potential_umi)
-            umi, umi_score, umi_start, umi_end = \
-                find_candidate_with_max_score_ssw(matching_umis, potential_umi, min_score=7)
-            logger.debug("Found UMI %s %d-%d" % (umi, umi_start, umi_end))
-
-        if not umi:
-            umi = potential_umi
-            if self.UMI_LEN - self.UMI_LEN_DELTA <= len(umi) <= self.UMI_LEN + self.UMI_LEN_DELTA and \
-                    all(x != "T" for x in umi[-self.NON_T_UMI_BASES:]):
-                good_umi = True
+        umi = potential_umi
+        if self.UMI_LEN - self.UMI_LEN_DELTA <= len(umi) <= self.UMI_LEN + self.UMI_LEN_DELTA and \
+                all(x != "T" for x in umi[-self.NON_T_UMI_BASES:]):
+            good_umi = True
 
         if not umi:
             return LinkerBarcodeDetectionResult(read_id, barcode, BC_score=bc_score,
@@ -193,6 +189,10 @@ def _find_barcode_umi_fwd(self, read_id: str, sequence: str) -> LinkerBarcodeDet
     def result_type():
         return LinkerBarcodeDetectionResult
 
+    @classmethod
+    def header(cls):
+        return cls.result_type().header()
+
 
 class CurioIlluminaDetector:
     """
@@ -215,8 +215,7 @@ class CurioIlluminaDetector:
     TERMINAL_MATCH_DELTA = 1
     STRICT_TERMINAL_MATCH_DELTA = 0
 
-    def __init__(self, joint_barcode_list: List[str], umi_list: Optional[List[str]] = None,
-                 min_score: int = 14):
+    def __init__(self, joint_barcode_list: List[str]):
         """
         Initialize Illumina Curio detector.
 
@@ -227,13 +226,15 @@ def __init__(self, joint_barcode_list: List[str], umi_list: Optional[List[str]]
         """
         self.pcr_primer_indexer = KmerIndexer([CurioBarcodeDetector.PCR_PRIMER], kmer_size=6)
         self.linker_indexer = KmerIndexer([CurioBarcodeDetector.LINKER], kmer_size=5)
-        self.barcode_indexer = KmerIndexer(joint_barcode_list, kmer_size=5)
-        self.umi_set = None
-        if umi_list:
-            self.umi_set = set(umi_list)
-            logger.debug("Loaded %d UMIs" % len(umi_list))
-            self.umi_indexer = KmerIndexer(umi_list, kmer_size=5)
-        self.min_score = min_score
+        if len(joint_barcode_list) <= 100000:
+            self.barcode_indexer = ArrayKmerIndexer(joint_barcode_list, kmer_size=5)
+            self.max_barcodes_hits = 20
+            self.min_matching_kmers = 2
+        else:
+            self.barcode_indexer = Array2BitKmerIndexer([str_to_2bit(b) for b in joint_barcode_list], kmer_size=5, seq_len=self.BC_LENGTH)
+            self.max_barcodes_hits = 10
+            self.min_matching_kmers = 2
+        self.min_score = 14
 
     def find_barcode_umi(self, read_id: str, sequence: str) -> LinkerBarcodeDetectionResult:
         """Detect barcode and UMI."""
@@ -285,7 +286,9 @@ def _find_barcode_umi_fwd(self, read_id: str, sequence: str) -> LinkerBarcodeDet
         logger.debug("Barcode: %s" % (potential_barcode))
         if len(potential_barcode) < self.MIN_BC_LEN:
             return LinkerBarcodeDetectionResult(read_id, linker_start=linker_start, linker_end=linker_end)
-        matching_barcodes = self.barcode_indexer.get_occurrences(potential_barcode)
+        matching_barcodes = self.barcode_indexer.get_occurrences(potential_barcode,
+                                                                 max_hits=self.max_barcodes_hits,
+                                                                 min_kmers=self.min_matching_kmers)
         barcode, bc_score, bc_start, bc_end = \
             find_candidate_with_max_score_ssw(matching_barcodes, potential_barcode,
                                               min_score=len(potential_barcode) - 1, score_diff=self.SCORE_DIFF)
@@ -304,19 +307,11 @@ def _find_barcode_umi_fwd(self, read_id: str, sequence: str) -> LinkerBarcodeDet
         potential_umi = sequence[potential_umi_start:min(potential_umi_end + 1, len(sequence))]
         logger.debug("Potential UMI: %s" % potential_umi)
 
-        umi = None
         good_umi = False
-        if self.umi_set:
-            matching_umis = self.umi_indexer.get_occurrences(potential_umi)
-            umi, umi_score, umi_start, umi_end = \
-                find_candidate_with_max_score_ssw(matching_umis, potential_umi, min_score=7)
-            logger.debug("Found UMI %s %d-%d" % (umi, umi_start, umi_end))
-
-        if not umi:
-            umi = potential_umi
-            if self.UMI_LEN - self.UMI_LEN_DELTA <= len(umi) <= self.UMI_LEN + self.UMI_LEN_DELTA and \
-                    all(x != "T" for x in umi[-self.NON_T_UMI_BASES:]):
-                good_umi = True
+        umi = potential_umi
+        if self.UMI_LEN - self.UMI_LEN_DELTA <= len(umi) <= self.UMI_LEN + self.UMI_LEN_DELTA and \
+                all(x != "T" for x in umi[-self.NON_T_UMI_BASES:]):
+            good_umi = True
 
         if not umi:
             return LinkerBarcodeDetectionResult(read_id, barcode, BC_score=bc_score,
@@ -329,3 +324,8 @@ def _find_barcode_umi_fwd(self, read_id: str, sequence: str) -> LinkerBarcodeDet
     @staticmethod
     def result_type():
         return LinkerBarcodeDetectionResult
+
+    @classmethod
+    def header(cls):
+        return cls.result_type().header()
+
diff --git a/src/barcode_calling/callers/extraction_result.py b/src/barcode_calling/callers/extraction_result.py
new file mode 100644
index 00000000..2c3fb4b7
--- /dev/null
+++ b/src/barcode_calling/callers/extraction_result.py
@@ -0,0 +1,333 @@
+############################################################################
+# Copyright (c) 2023-2026 University of Helsinki
+# # All Rights Reserved
+# See file LICENSE for details.
+############################################################################
+
+"""
+Dict-based flexible detection result for universal molecule extraction.
+
+Unlike the attribute-based result classes in base.py, ExtractionResult stores
+detected elements in a dictionary, allowing flexible molecule structures.
+Implements the BarcodeResult protocol.
+"""
+
+import logging
+from collections import defaultdict
+from typing import List
+
+from .molecule_structure import MoleculeStructure, ElementType
+
+logger = logging.getLogger('IsoQuant')
+
+
+class DetectedElement:
+    """Container for a single detected element's coordinates and sequence."""
+
+    def __init__(self, start: int = -1, end: int = -1, score: int = -1, seq: str = None):
+        self.start = start
+        self.end = end
+        self.score = score
+        self.seq = seq
+
+
+class ExtractionResult:
+    """
+    Dict-based flexible detection result for universal molecule extraction.
+
+    Unlike the attribute-based result classes (BarcodeDetectionResult, etc.),
+    ExtractionResult stores detected elements in a dictionary, allowing
+    flexible molecule structures defined by MoleculeStructure.
+
+    Implements the BarcodeResult protocol.
+
+    Barcode/UMI elements are identified by naming convention:
+    - Elements with prefix "barcode" (case-insensitive) are barcodes
+    - Elements with prefix "umi" (case-insensitive) are UMIs
+    """
+
+    NOSEQ = "*"  # Sentinel for missing/undetected sequence (matches BarcodeDetectionResult.NOSEQ)
+
+    def __init__(self, molecule_structure: MoleculeStructure, read_id: str, strand: str, detected_results: dict):
+        """
+        Initialize extraction result.
+
+        Args:
+            molecule_structure: Molecule structure definition
+            read_id: Read identifier
+            strand: Detected strand ('+', '-', or '.')
+            detected_results: Dict mapping element names to DetectedElement objects
+        """
+        self.molecule_structure = molecule_structure
+        self.read_id = read_id
+        self.strand = strand
+        self.detected_results = detected_results
+
+    @property
+    def _barcode_elements(self) -> List[str]:
+        """Get barcode element names from molecule structure."""
+        return self.molecule_structure.barcode_elements
+
+    @property
+    def _umi_elements(self) -> List[str]:
+        """Get UMI element names from molecule structure."""
+        return self.molecule_structure.umi_elements
+
+    def get_barcode(self) -> str:
+        """
+        Get the detected barcode sequence.
+
+        For molecule structures with multiple barcode elements (e.g., barcode1, barcode2),
+        returns concatenated barcodes using '|' delimiter only when ALL parts are detected.
+        Returns NOSEQ if any part is missing (partial barcodes are not usable).
+
+        Returns:
+            str: Concatenated barcode sequence or NOSEQ if any part is not detected.
+        """
+        parts = []
+        for el_name in self._barcode_elements:
+            if el_name in self.detected_results:
+                detected = self.detected_results[el_name]
+                if detected.seq and detected.seq != self.NOSEQ:
+                    parts.append(detected.seq)
+                    continue
+            return self.NOSEQ
+        return "|".join(parts) if parts else self.NOSEQ
+
+    def get_umi(self) -> str:
+        """
+        Get the detected UMI sequence.
+
+        For molecule structures with multiple UMI elements,
+        returns all parts using '|' delimiter, with NOSEQ ('*') for undetected elements.
+
+        Returns:
+            str: Concatenated UMI sequence or NOSEQ if not detected.
+        """
+        if len(self._umi_elements) <= 1:
+            for el_name in self._umi_elements:
+                if el_name in self.detected_results:
+                    detected = self.detected_results[el_name]
+                    if detected.seq and detected.seq != self.NOSEQ:
+                        return detected.seq
+            return self.NOSEQ
+
+        parts = []
+        for el_name in self._umi_elements:
+            if el_name in self.detected_results:
+                detected = self.detected_results[el_name]
+                if detected.seq and detected.seq != self.NOSEQ:
+                    parts.append(detected.seq)
+                    continue
+            parts.append(self.NOSEQ)
+        return "|".join(parts)
+
+    def is_valid(self) -> bool:
+        """Check if a valid barcode was detected (all parts must be present)."""
+        return self.get_barcode() != self.NOSEQ
+
+    def has_barcode(self) -> bool:
+        """Check if a barcode was detected (alias for is_valid())."""
+        return self.get_barcode() != self.NOSEQ
+
+    def has_umi(self) -> bool:
+        """Check if any valid UMI was detected."""
+        return self.get_umi() != self.NOSEQ
+
+    def get_barcode_score(self) -> int:
+        """
+        Get the best barcode alignment score.
+
+        Returns the maximum score from all barcode elements, or -1 if none detected.
+        """
+        max_score = -1
+        for el_name in self._barcode_elements:
+            if el_name in self.detected_results:
+                detected = self.detected_results[el_name]
+                if detected.score > max_score:
+                    max_score = detected.score
+        return max_score
+
+    def update_coordinates(self, delta: int) -> None:
+        """
+        Shift all genomic coordinates by delta.
+
+        Used when processing read subsequences.
+
+        Args:
+            delta: Amount to shift coordinates
+        """
+        for detected in self.detected_results.values():
+            if detected.start != -1:
+                detected.start += delta
+            if detected.end != -1:
+                detected.end += delta
+
+    def more_informative_than(self, other: 'ExtractionResult') -> bool:
+        """
+        Compare two results to determine which is more informative.
+
+        Compares by number of detected elements, then total score.
+
+        Args:
+            other: Another detection result
+
+        Returns:
+            True if this result is more informative than other
+        """
+        self_count = sum(1 for d in self.detected_results.values() if d.start != -1)
+        other_count = sum(1 for d in other.detected_results.values() if d.start != -1)
+        if self_count != other_count:
+            return self_count > other_count
+        self_score = sum(d.score for d in self.detected_results.values() if d.score > 0)
+        other_score = sum(d.score for d in other.detected_results.values() if d.score > 0)
+        return self_score > other_score
+
+    def get_additional_attributes(self) -> List[str]:
+        """
+        Get list of detected supplementary features (excluding barcode/UMI).
+
+        Barcode and UMI elements are excluded because they are already
+        tracked by ReadStats.bc_count and ReadStats.umi_count respectively.
+
+        Returns:
+            List of detected element names with " detected" suffix.
+        """
+        barcode_umi_set = set(self._barcode_elements + self._umi_elements)
+        return ["%s detected" % el_name for el_name, d in self.detected_results.items()
+                if d.start != -1 and el_name not in barcode_umi_set]
+
+    def set_strand(self, strand: str) -> None:
+        """Set the detected strand."""
+        self.strand = strand
+
+    def _get_supplementary_elements(self) -> List[str]:
+        """Get list of element names that are not barcode, UMI, or concatenated/duplicated parts."""
+        supplementary = []
+        barcode_umi_set = set(self._barcode_elements + self._umi_elements)
+        for el in self.molecule_structure:
+            if el.element_type == ElementType.cDNA:
+                continue
+            if el.element_name in self.molecule_structure.elements_to_concatenate:
+                continue
+            if el.element_name in self.molecule_structure.duplicated_elements:
+                continue
+            if el.element_name not in barcode_umi_set:
+                supplementary.append(el.element_name)
+        return supplementary
+
+    def __str__(self) -> str:
+        """
+        Format result as TSV line in standard format.
+
+        Format: read_id, barcode, UMI, BC_score, valid_UMI, strand, [supplementary elements]
+        This matches the format expected by the rest of the IsoQuant pipeline.
+        """
+        # Standard fields: read_id, barcode, UMI, BC_score, valid_UMI, strand
+        res_str = "%s\t%s\t%s\t%d\t%s\t%s" % (
+            self.read_id,
+            self.get_barcode(),
+            self.get_umi(),
+            self.get_barcode_score(),
+            self.has_umi(),
+            self.strand
+        )
+
+        # Supplementary elements (non-barcode, non-UMI)
+        barcode_umi_set = set(self._barcode_elements + self._umi_elements)
+        for el in self.molecule_structure:
+            if el.element_type == ElementType.cDNA:
+                continue
+            # Skip concatenated/duplicated parts (data lives under base name)
+            if el.element_name in self.molecule_structure.elements_to_concatenate:
+                continue
+            if el.element_name in self.molecule_structure.duplicated_elements:
+                continue
+            # Skip barcode and UMI elements (already printed above)
+            if el.element_name in barcode_umi_set:
+                continue
+
+            if el.element_type == ElementType.PolyT:
+                if ElementType.PolyT.name not in self.detected_results:
+                    res_str += "\t-1\t-1"
+                    continue
+                detected_element = self.detected_results[ElementType.PolyT.name]
+                res_str += "\t%d\t%d" % (detected_element.start, detected_element.end)
+            elif el.element_type.is_constant():
+                if el.element_name not in self.detected_results:
+                    res_str += "\t-1\t-1"
+                    continue
+                detected_element = self.detected_results[el.element_name]
+                res_str += "\t%d\t%d" % (detected_element.start, detected_element.end)
+            elif el.element_type.needs_sequence_extraction() or el.element_type.needs_correction():
+                if el.element_name not in self.detected_results:
+                    res_str += "\t-1\t-1\t%s" % self.NOSEQ
+                    continue
+                detected_element = self.detected_results[el.element_name]
+                seq = detected_element.seq if detected_element.seq else self.NOSEQ
+                res_str += "\t%d\t%d\t%s" % (detected_element.start, detected_element.end, seq)
+        return res_str
+
+    def header(self) -> str:
+        """
+        Get TSV header for result output in standard format.
+
+        Format: #read_id, barcode, UMI, BC_score, valid_UMI, strand, [supplementary elements]
+        """
+        # Standard header fields
+        header = "#read_id\tbarcode\tUMI\tBC_score\tvalid_UMI\tstrand"
+
+        # Supplementary elements (non-barcode, non-UMI)
+        barcode_umi_set = set(self._barcode_elements + self._umi_elements)
+        for el in self.molecule_structure:
+            if el.element_type == ElementType.cDNA:
+                continue
+            # Skip concatenated/duplicated parts (data lives under base name)
+            if el.element_name in self.molecule_structure.elements_to_concatenate:
+                continue
+            if el.element_name in self.molecule_structure.duplicated_elements:
+                continue
+            # Skip barcode and UMI elements (already in standard header)
+            if el.element_name in barcode_umi_set:
+                continue
+
+            if el.element_type == ElementType.PolyT:
+                header += "\tpolyT_start\tpolyT_end"
+            elif el.element_type.needs_only_coordinates():
+                header += "\t%s_start\t%s_end" % (el.element_name, el.element_name)
+            elif el.element_type.needs_sequence_extraction() or el.element_type.needs_correction():
+                header += "\t%s_start\t%s_end\t%s_seq" % (el.element_name, el.element_name, el.element_name)
+        return header
+
+    # TODO: add simple serialization
+
+
+class ReadStats:
+    """Statistics tracker for ExtractionResult objects."""
+
+    def __init__(self):
+        self.read_count = 0
+        self.bc_count = 0
+        self.umi_count = 0
+        self.pattern_counts = defaultdict(int)
+
+    def add_read(self, barcode_detection_result: ExtractionResult) -> None:
+        """Add an ExtractionResult to statistics."""
+        self.read_count += 1
+        # Count valid barcodes and UMIs
+        if barcode_detection_result.has_barcode():
+            self.bc_count += 1
+        if barcode_detection_result.has_umi():
+            self.umi_count += 1
+        # Count individual detected elements
+        for el in barcode_detection_result.detected_results:
+            if barcode_detection_result.detected_results[el].start != -1:
+                self.pattern_counts[el] += 1
+
+    def __str__(self) -> str:
+        human_readable_str = "Total reads:\t%d\n" % self.read_count
+        human_readable_str += "Barcode detected:\t%d\n" % self.bc_count
+        human_readable_str += "UMI detected:\t%d\n" % self.umi_count
+        for a in self.pattern_counts:
+            human_readable_str += "%s:\t%d\n" % (a, self.pattern_counts[a])
+        return human_readable_str
diff --git a/src/barcode_calling/callers/molecule_structure.py b/src/barcode_calling/callers/molecule_structure.py
new file mode 100644
index 00000000..478bc28e
--- /dev/null
+++ b/src/barcode_calling/callers/molecule_structure.py
@@ -0,0 +1,277 @@
+###########################################################################
+# Copyright (c) 2026 University of Helsinki
+# # All Rights Reserved
+# See file LICENSE for details.
+############################################################################
+
+import logging
+import sys
+from enum import unique, Enum
+from typing import Iterator, List
+from collections import defaultdict
+
+from ...error_codes import IsoQuantExitCode
+from ..common import load_barcodes
+
+logger = logging.getLogger('IsoQuant')
+
+
+@unique
+class ElementType(Enum):
+    PolyT = 1
+    cDNA = 2
+    CONST = 10
+    VAR_ANY = 20
+    VAR_LIST = 21
+    VAR_FILE = 22
+    VAR_ANY_SEPARATOR = 31
+    VAR_ANY_NON_T_SEPARATOR = 32
+
+    def needs_correction(self):
+        return self in (ElementType.VAR_FILE, ElementType.VAR_LIST)
+
+    def needs_sequence_extraction(self):
+        return self == ElementType.VAR_ANY
+
+    def is_base_separator(self):
+        return self in (ElementType.VAR_ANY_SEPARATOR, ElementType.VAR_ANY_NON_T_SEPARATOR)
+
+    def is_variable(self):
+        return self in (ElementType.VAR_ANY, ElementType.VAR_LIST, ElementType.VAR_FILE)
+
+    def is_constant(self):
+        return self == ElementType.CONST
+
+    def needs_only_coordinates(self):
+        """Return True if element only needs start/end coordinates (no sequence extraction)."""
+        return self.is_constant()
+
+
+class MoleculeElement:
+    def __init__(self, element_name: str, element_type: ElementType, element_value_1 = None, element_value_2 = None):
+        self.element_name = element_name
+        self.element_type = element_type
+
+        if self.element_type in [ElementType.PolyT, ElementType.cDNA]:
+            self.element_value = None
+            self.element_length = -1
+        elif self.element_type == ElementType.CONST:
+            self.element_value = element_value_1
+            self.element_length = len(self.element_value)
+        elif self.element_type == ElementType.VAR_FILE:
+            self.element_value = load_barcodes(element_value_1, needs_iterator=False)
+            if element_value_2 is None:
+                self.element_length = len(self.element_value[0])
+            else:
+                try:
+                    self.element_length = int(element_value_2)
+                except ValueError:
+                    logger.critical("Incorrectly specified length for element %s: %s" % (self.element_name, element_value_2))
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+        elif self.element_type == ElementType.VAR_LIST:
+            self.element_value = element_value_1.split(',')
+            if element_value_2 is None:
+                self.element_length = len(self.element_value[0])
+            else:
+                try:
+                    self.element_length = int(element_value_2)
+                except ValueError:
+                    logger.critical(
+                        "Incorrectly specified length for element %s: %s" % (self.element_name, element_value_2))
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+        elif self.element_type in (ElementType.VAR_ANY, ElementType.VAR_ANY_SEPARATOR, ElementType.VAR_ANY_NON_T_SEPARATOR):
+            self.element_value = None
+            try:
+                self.element_length = int(element_value_1)
+            except ValueError:
+                logger.critical("Incorrectly specified length for element %s: %s" % (self.element_name, element_value_1))
+                sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+        else:
+            logger.critical("Wrong element type %s" % element_type)
+            sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+
+
+class MoleculeStructure:
+    """
+    Defines the structure of a molecule for barcode/UMI extraction.
+
+    Parses molecule definition files (MDF) and identifies which elements
+    are barcodes and UMIs based on naming convention:
+    - Elements with prefix "barcode" (case-insensitive) are barcodes
+    - Elements with prefix "umi" (case-insensitive) are UMIs
+    """
+
+    CONCAT_DELIM = "|"
+    DUPL_DELIM = "/"
+
+    def __init__(self, str_iterator):
+        self.ordered_elements: List[MoleculeElement] = []
+        self.elements_to_concatenate = {}
+        self.concatenated_elements_counts = {}
+        self.duplicated_elements = {}
+        self.duplicated_elements_counts = {}
+        # Barcode/UMI element names identified by naming convention
+        self._barcode_elements: List[str] = []
+        self._umi_elements: List[str] = []
+
+        l = next(str_iterator)
+        elements = list(map(lambda x: x.strip(), l.strip().split(':')))
+        element_properties = {}
+        for l in str_iterator:
+            v = l.strip().split()
+            if not v:
+                continue
+            element_name = v[0]
+            if element_name in element_properties:
+                logger.critical("Duplicated element name %s" % element_name)
+                sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+
+            if len(v) == 3:
+                element_properties[element_name] = (v[1], v[2], None)
+            elif len(v) == 4:
+                element_properties[element_name] = (v[1], v[2], v[3])
+            else:
+                logger.critical("Incorrect number of properties in line %s" % l.strip())
+                sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+
+        element_name: str
+        for element_name in elements:
+            if element_name not in element_properties:
+                if element_name not in (ElementType.cDNA.name, ElementType.PolyT.name):
+                    logger.critical("Molecule element %s was not described in the format file" % element_name)
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+                element_type = ElementType[element_name]
+                self.ordered_elements.append(MoleculeElement(element_name, element_type))
+            else:
+                element_type, element_val1, element_val2 = element_properties[element_name]
+                if element_type not in ElementType.__dict__:
+                    logger.critical("Molecule element type %s is not among the possible types" % element_type)
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+                element_type = ElementType[element_type]
+                self.ordered_elements.append(MoleculeElement(element_name, element_type, element_val1, element_val2))
+
+            if self.CONCAT_DELIM in element_name:
+                if not (element_type.needs_sequence_extraction() or element_type.needs_correction()):
+                    logger.critical("Concatenated elements must be variable (fix element %s)" % element_name)
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+                v = element_name.split(self.CONCAT_DELIM)
+                if len(v) != 2:
+                    logger.critical("Incorrect concatenated element %s" % element_name)
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+                try:
+                    self.elements_to_concatenate[element_name] = (v[0], int(v[1]))
+                except ValueError:
+                    logger.critical("Incorrectly specified index for concatenated element %s: %s" % (element_name, v[1]))
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+
+            elif self.DUPL_DELIM in element_name:
+                if not (element_type.needs_sequence_extraction() or element_type.needs_correction()):
+                    logger.critical("Concatenated elements must be variable (fix element %s)" % element_name)
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+                v = element_name.split(self.DUPL_DELIM)
+                if len(v) != 2:
+                    logger.critical("Incorrect duplicated element %s" % element_name)
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+                try:
+                    self.duplicated_elements[element_name] = (v[0], int(v[1]))
+                except ValueError:
+                    logger.critical("Incorrectly specified index for duplicated element %s: %s" % (element_name, v[1]))
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+
+        self._check_linked_elements(self.elements_to_concatenate)
+        self.concatenated_elements_counts = self._check_indices(self.elements_to_concatenate)
+        self._check_linked_elements(self.duplicated_elements)
+        self.duplicated_elements_counts = self._check_indices(self.duplicated_elements)
+        self._identify_barcode_umi_elements()
+
+    def _check_linked_elements(self, element_dict):
+        # check that duplicated or concatenated elements have the same values (e.g. barcode whitelist)
+        # base element name -> List[MoleculeElement]
+        linked_elements = defaultdict(list)
+        for el in self.ordered_elements:
+            if el.element_name in element_dict:
+                base_element_name = element_dict[el.element_name][0]
+                linked_elements[base_element_name].append(el)
+
+        for element_list in linked_elements.values():
+            if len(set(el.element_type for el in element_list)) != 1:
+                logger.critical("Linked elements must have the same type (fix elements: %s)" % ", ".join(el.element_name for el in element_list))
+                sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+            first_value = element_list[0].element_value
+            for el in element_list[1:]:
+                if el.element_value != first_value:
+                    logger.critical("Linked elements must have the same values (fix elements: %s)" % ", ".join(e.element_name for e in element_list))
+                    sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+
+    def _check_indices(self, element_dict):
+        # check for consecutive indices
+        index_dict = defaultdict(list)
+        for el in element_dict:
+            base_element_name = element_dict[el][0]
+            index_dict[base_element_name].append(element_dict[el][1])
+
+        for el in index_dict:
+            if sorted(index_dict[el]) != list(range(1, len(index_dict[el]) + 1)):
+                logger.critical("Concatenated elements must be consecutive from 1 to %d (fix element %s, indices: %s)" % (len(index_dict[el]), el, str(index_dict[el])))
+                sys.exit(IsoQuantExitCode.INVALID_FILE_FORMAT)
+
+        return {el: len(val) for el, val in index_dict.items()}
+
+    def _identify_barcode_umi_elements(self) -> None:
+        """
+        Identify barcode/UMI elements by naming convention.
+
+        Elements with prefix "barcode" (case-insensitive) are barcodes.
+        Elements with prefix "umi" (case-insensitive) are UMIs.
+
+        For concatenated/duplicated elements (e.g., barcode|1, barcode|2),
+        the base name (e.g., "barcode") is used and deduplicated.
+        """
+        self._barcode_elements = []
+        self._umi_elements = []
+        seen = set()
+        for el in self.ordered_elements:
+            # For concatenated/duplicated elements, use the base name
+            if el.element_name in self.elements_to_concatenate:
+                name = self.elements_to_concatenate[el.element_name][0]
+            elif el.element_name in self.duplicated_elements:
+                name = self.duplicated_elements[el.element_name][0]
+            else:
+                name = el.element_name
+
+            if name in seen:
+                continue
+            seen.add(name)
+
+            name_lower = name.lower()
+            if name_lower.startswith("barcode"):
+                self._barcode_elements.append(name)
+            elif name_lower.startswith("umi"):
+                self._umi_elements.append(name)
+
+    @property
+    def barcode_elements(self) -> List[str]:
+        """Get list of barcode element names (identified by 'barcode' prefix)."""
+        return self._barcode_elements
+
+    @property
+    def umi_elements(self) -> List[str]:
+        """Get list of UMI element names (identified by 'umi' prefix)."""
+        return self._umi_elements
+
+    @classmethod
+    def from_element_list(cls, element_list):
+        """Create MoleculeStructure from a list of MoleculeElement objects."""
+        ms = cls.__new__(cls)
+        ms.ordered_elements = element_list
+        ms.elements_to_concatenate = {}
+        ms.duplicated_elements = {}
+        ms._barcode_elements = []
+        ms._umi_elements = []
+        ms._identify_barcode_umi_elements()
+        return ms
+
+    def __iter__(self) -> Iterator[MoleculeElement]:
+        for e in self.ordered_elements:
+            yield e
+
diff --git a/src/barcode_calling/callers/protocol.py b/src/barcode_calling/callers/protocol.py
new file mode 100644
index 00000000..a76f6717
--- /dev/null
+++ b/src/barcode_calling/callers/protocol.py
@@ -0,0 +1,191 @@
+############################################################################
+# Copyright (c) 2023-2026 University of Helsinki
+# # All Rights Reserved
+# See file LICENSE for details.
+############################################################################
+
+"""
+Protocol definition for barcode detection results.
+
+This module defines the BarcodeResult protocol that all barcode detection
+result classes implement. Using Protocol (structural typing) allows
+existing classes to automatically conform without explicit inheritance.
+"""
+
+from typing import Protocol, List, ClassVar, runtime_checkable
+
+
+@runtime_checkable
+class BarcodeResult(Protocol):
+    """
+    Protocol defining the interface for barcode detection results.
+
+    All barcode detection result classes (BarcodeDetectionResult,
+    LinkerBarcodeDetectionResult, TSOBarcodeDetectionResult,
+    TenXBarcodeDetectionResult, ExtractionResult) implement this interface.
+
+    Using Protocol (PEP 544) enables structural subtyping - classes
+    don't need to explicitly inherit from this protocol to be
+    considered compatible; they just need to implement the required
+    methods and attributes.
+
+    Attributes:
+        NOSEQ: Class variable sentinel for missing/undetected sequence ("*").
+        read_id: The unique identifier of the read being analyzed.
+        strand: The detected strand orientation ('+', '-', or '.').
+
+    Example:
+        def process_result(result: BarcodeResult) -> None:
+            if result.is_valid():
+                barcode = result.get_barcode()
+                umi = result.get_umi()
+                print(f"{result.read_id}: {barcode}/{umi}")
+    """
+
+    NOSEQ: ClassVar[str]
+    read_id: str
+    strand: str
+
+    def get_barcode(self) -> str:
+        """
+        Get the detected barcode sequence.
+
+        Returns the barcode sequence if detected, or NOSEQ ("*") if no
+        barcode was found. For platforms with multiple barcode segments
+        (e.g., Visium HD with part1|part2), returns the concatenated
+        barcode with "|" delimiter.
+
+        Returns:
+            str: Detected barcode sequence or NOSEQ if not found.
+        """
+        ...
+
+    def get_umi(self) -> str:
+        """
+        Get the detected UMI (Unique Molecular Identifier) sequence.
+
+        Returns the UMI sequence if detected, or NOSEQ ("*") if no
+        UMI was found. For platforms with multiple UMI elements,
+        returns concatenated UMIs with "|" delimiter.
+
+        Returns:
+            str: Detected UMI sequence or NOSEQ if not found.
+        """
+        ...
+
+    def is_valid(self) -> bool:
+        """
+        Check if a valid barcode was detected.
+
+        A result is considered valid if get_barcode() returns something
+        other than NOSEQ. This is the primary method to check if barcode
+        detection was successful.
+
+        Returns:
+            bool: True if a valid barcode was detected, False otherwise.
+        """
+        ...
+
+    def has_barcode(self) -> bool:
+        """
+        Check if a barcode was detected (alias for is_valid()).
+
+        Used by ReadStats for counting detected barcodes.
+
+        Returns:
+            bool: True if barcode was detected.
+        """
+        ...
+
+    def has_umi(self) -> bool:
+        """
+        Check if a valid UMI was detected.
+
+        Returns True if get_umi() returns something other than NOSEQ.
+        Used by ReadStats for counting detected UMIs.
+
+        Returns:
+            bool: True if UMI was detected.
+        """
+        ...
+
+    def set_strand(self, strand: str) -> None:
+        """
+        Set the detected strand orientation.
+
+        Args:
+            strand: Strand orientation. Should be '+' for forward,
+                   '-' for reverse, or '.' for unknown/unstranded.
+        """
+        ...
+
+    def update_coordinates(self, delta: int) -> None:
+        """
+        Shift all genomic coordinates by a delta value.
+
+        Used when processing read subsequences (e.g., after splitting
+        a concatenated read). Implementations should update all position
+        attributes (polyT, primer, linker_start, linker_end, etc.)
+        by adding delta. Coordinates that are -1 (invalid/not detected)
+        should remain -1.
+
+        Args:
+            delta: Amount to add to all coordinate values.
+        """
+        ...
+
+    def more_informative_than(self, other: 'BarcodeResult') -> bool:
+        """
+        Compare two results to determine which is more informative.
+
+        Used when both forward and reverse complement orientations
+        produce results, to select the better one. The comparison
+        criteria vary by platform but typically consider:
+        - Barcode alignment score
+        - Presence of additional features (polyT, primer, linker)
+        - Feature positions
+
+        Args:
+            other: Another detection result to compare against.
+
+        Returns:
+            bool: True if this result is more informative than `other`.
+        """
+        ...
+
+    def get_additional_attributes(self) -> List[str]:
+        """
+        Get list of detected additional features.
+
+        Returns human-readable strings describing which optional
+        features were detected (e.g., "PolyT detected", "Primer detected",
+        "Linker detected", "TSO detected", "R1 detected").
+
+        Used for statistics collection and quality reporting.
+
+        Returns:
+            List[str]: Names of detected features. Empty list if none.
+        """
+        ...
+
+    def __str__(self) -> str:
+        """
+        Format result as a tab-separated string for TSV output.
+
+        The format should match what header() returns.
+
+        Returns:
+            str: Tab-separated values for TSV output.
+        """
+        ...
+
+    def header(self) -> str:
+        """
+        Get TSV header line for result output.
+
+        Returns the column names matching the fields output by __str__().
+
+        Returns:
+            str: Tab-separated column names with leading '#'.
+        """
+        ...
diff --git a/src/barcode_calling/callers/stereo.py b/src/barcode_calling/callers/stereo.py
index bdcbe8c0..58a65b96 100644
--- a/src/barcode_calling/callers/stereo.py
+++ b/src/barcode_calling/callers/stereo.py
@@ -36,13 +36,12 @@ class StereoBarcodeDetector:
     TERMINAL_MATCH_DELTA = 3
     STRICT_TERMINAL_MATCH_DELTA = 1
 
-    def __init__(self, barcodes: List[str], min_score: int = 21):
+    def __init__(self, barcodes: List[str]):
         """
         Initialize Stereo-seq detector.
 
         Args:
             barcodes: List of known barcode sequences
-            min_score: Minimum alignment score for barcode matching
         """
         self.main_primer = StereoBarcodeDetector.PC1_PRIMER
         self.pcr_primer_indexer = ArrayKmerIndexer([self.main_primer], kmer_size=6)
@@ -55,8 +54,7 @@ def __init__(self, barcodes: List[str], min_score: int = 21):
             self.barcode_indexer = Dict2BitKmerIndexer(bit_barcodes, kmer_size=14, seq_len=self.BC_LENGTH)
             logger.info("Indexed %d barcodes" % len(bit_barcodes))
 
-        self.umi_set = None
-        self.min_score = min_score
+        self.min_score = 21
 
     def find_barcode_umi_multiple(self, read_id: str, sequence: str) -> List[LinkerBarcodeDetectionResult]:
         """Find multiple barcodes in a single read."""
@@ -203,14 +201,18 @@ def _find_barcode_umi_fwd(self, read_id: str, sequence: str) -> LinkerBarcodeDet
     def result_type():
         return LinkerBarcodeDetectionResult
 
+    @classmethod
+    def header(cls):
+        return cls.result_type().header()
+
 
 class SharedMemoryStereoBarcodeDetector(StereoBarcodeDetector):
     """Stereo-seq detector with shared memory for large barcode sets."""
 
     MIN_BARCODES_FOR_SHARED_MEM = 1000000
 
-    def __init__(self, barcodes: List[str], min_score: int = 21):
-        super().__init__([], min_score=min_score)
+    def __init__(self, barcodes: List[str]):
+        super().__init__([])
         # Convert to 2-bit encoding first (memory efficient)
         bit_barcodes = batch_str_to_2bit_chunked(iter(barcodes), seq_len=self.BC_LENGTH)
         self.barcode_count = len(bit_barcodes)
@@ -238,7 +240,7 @@ def __getstate__(self):
 
     def __setstate__(self, state):
         self.min_score = state[0]
-        super().__init__([], min_score=self.min_score)
+        super().__init__([])
         self.bit_barcodes = None
         self.barcode_count = state[1]
         if self.barcode_count < self.MIN_BARCODES_FOR_SHARED_MEM:
@@ -262,13 +264,12 @@ class StereoSplittingBarcodeDetector:
     TERMINAL_MATCH_DELTA = 3
     STRICT_TERMINAL_MATCH_DELTA = 1
 
-    def __init__(self, barcodes: List[str], min_score: int = 21):
+    def __init__(self, barcodes: List[str]):
         """
         Initialize splitting detector.
 
         Args:
             barcodes: List of known barcode sequences
-            min_score: Minimum alignment score
         """
         self.main_primer = self.PC1_PRIMER
         self.tso5_indexer = ArrayKmerIndexer([self.TSO5], kmer_size=8)
@@ -281,8 +282,7 @@ def __init__(self, barcodes: List[str], min_score: int = 21):
             bit_barcodes = batch_str_to_2bit_chunked(iter(barcodes), seq_len=self.BC_LENGTH)
             self.barcode_indexer = Dict2BitKmerIndexer(bit_barcodes, kmer_size=14, seq_len=self.BC_LENGTH)
             logger.info("Indexed %d barcodes" % len(bit_barcodes))
-        self.umi_set = None
-        self.min_score = min_score
+        self.min_score = 21
 
     def find_barcode_umi(self, read_id: str, sequence: str) -> SplittingBarcodeDetectionResult:
         """Find multiple barcodes in a concatenated read."""
@@ -490,14 +490,18 @@ def _find_barcode_umi_fwd(self, read_id: str, sequence: str) -> TSOBarcodeDetect
     def result_type():
         return SplittingBarcodeDetectionResult
 
+    @classmethod
+    def header(cls):
+        return cls.result_type().header()
+
 
 class SharedMemoryStereoSplittingBarcodeDetector(StereoSplittingBarcodeDetector):
     """Stereo-seq splitting detector with shared memory for large barcode sets."""
 
     MIN_BARCODES_FOR_SHARED_MEM = 1000000
 
-    def __init__(self, barcodes: List[str], min_score: int = 21):
-        super().__init__([], min_score=min_score)
+    def __init__(self, barcodes: List[str]):
+        super().__init__([])
         # Convert to 2-bit encoding first (memory efficient)
         bit_barcodes = batch_str_to_2bit_chunked(iter(barcodes), seq_len=self.BC_LENGTH)
         self.barcode_count = len(bit_barcodes)
@@ -525,7 +529,7 @@ def __getstate__(self):
 
     def __setstate__(self, state):
         self.min_score = state[0]
-        super().__init__([], min_score=self.min_score)
+        super().__init__([])
         self.bit_barcodes = None
         self.barcode_count = state[1]
         if self.barcode_count < self.MIN_BARCODES_FOR_SHARED_MEM:
@@ -538,18 +542,17 @@ def __setstate__(self, state):
 class SharedMemoryWrapper:
     """Generic wrapper to add shared memory support to any barcode detector."""
 
-    def __init__(self, barcode_detector_class, barcodes: List[str], min_score: int = 21):
+    def __init__(self, barcode_detector_class, barcodes: List[str]):
         """
         Initialize wrapper.
 
         Args:
             barcode_detector_class: Detector class to wrap
             barcodes: List of known barcodes
-            min_score: Minimum alignment score
         """
         self.barcode_detector_class = barcode_detector_class
-        self.min_score = min_score
-        self.barcode_detector = self.barcode_detector_class([], self.min_score)
+        self.barcode_detector = self.barcode_detector_class([])
+        self.min_score = self.barcode_detector.min_score
         # Convert to 2-bit and use shared memory indexer
         barcode_iter = iter(barcodes) if isinstance(barcodes, list) else barcodes
         bit_barcodes = batch_str_to_2bit_chunked(barcode_iter, seq_len=self.barcode_detector_class.BC_LENGTH)
@@ -566,5 +569,6 @@ def __getstate__(self):
     def __setstate__(self, state):
         self.barcode_detector_class = state[0]
         self.min_score = state[1]
-        self.barcode_detector = self.barcode_detector_class([], self.min_score)
+        self.barcode_detector = self.barcode_detector_class([])
+        self.barcode_detector.min_score = self.min_score
         self.barcode_detector.barcode_indexer = SharedMemoryArray2BitKmerIndexer.from_sharable_info(state[2])
diff --git a/src/barcode_calling/callers/tenx.py b/src/barcode_calling/callers/tenx.py
index 449b7798..5732466d 100644
--- a/src/barcode_calling/callers/tenx.py
+++ b/src/barcode_calling/callers/tenx.py
@@ -13,11 +13,12 @@
 import logging
 from typing import List, Optional
 
+from .. import ArrayKmerIndexer, Array2BitKmerIndexer
 from ..indexers import KmerIndexer
 from ..common import (
     find_polyt_start, reverese_complement,
     find_candidate_with_max_score_ssw, find_candidate_with_max_score_ssw_var_len,
-    detect_exact_positions,
+    detect_exact_positions, str_to_2bit,
 )
 from .base import TenXBarcodeDetectionResult
 
@@ -36,7 +37,7 @@ class TenXBarcodeDetector:
     TERMINAL_MATCH_DELTA = 2
     STRICT_TERMINAL_MATCH_DELTA = 1
 
-    def __init__(self, barcode_list: List[str], umi_list: Optional[List[str]] = None):
+    def __init__(self, barcode_list: List[str]):
         """
         Initialize 10x detector.
 
@@ -45,15 +46,19 @@ def __init__(self, barcode_list: List[str], umi_list: Optional[List[str]] = None
             umi_list: Optional list of known UMIs for validation
         """
         self.r1_indexer = KmerIndexer([TenXBarcodeDetector.R1], kmer_size=7)
-        self.barcode_indexer = KmerIndexer(barcode_list, kmer_size=6)
-        self.umi_set = None
-        if umi_list:
-            self.umi_set = set(umi_list)
-            logger.debug("Loaded %d UMIs" % len(umi_list))
-            self.umi_indexer = KmerIndexer(umi_list, kmer_size=5)
-        self.min_score = 14
-        if len(self.barcode_indexer.seq_list) > 100000:
-            self.min_score = 16
+        if len(barcode_list) <= 100000:
+            self.barcode_indexer = ArrayKmerIndexer(barcode_list, kmer_size=6)
+            self.max_barcodes_hits = 20
+            self.min_matching_kmers = 1
+            self.min_score = 14
+            self.score_diff = 0
+        else:
+            self.barcode_indexer = Array2BitKmerIndexer([str_to_2bit(b) for b in barcode_list], kmer_size=6, seq_len=self.BARCODE_LEN_10X)
+            self.max_barcodes_hits = 10
+            self.min_matching_kmers = 2
+            self.min_score = 15
+            self.score_diff = 1
+
         logger.debug("Min score set to %d" % self.min_score)
 
     def find_barcode_umi(self, read_id: str, sequence: str) -> TenXBarcodeDetectionResult:
@@ -121,9 +126,13 @@ def _find_barcode_umi_fwd(self, read_id: str, sequence: str) -> TenXBarcodeDetec
         barcode_end = r1_end + self.BARCODE_LEN_10X + 1
         potential_barcode = sequence[barcode_start:barcode_end + 1]
         logger.debug("Barcode: %s" % (potential_barcode))
-        matching_barcodes = self.barcode_indexer.get_occurrences(potential_barcode)
+        matching_barcodes = self.barcode_indexer.get_occurrences(potential_barcode,
+                                                                 max_hits=self.max_barcodes_hits,
+                                                                 min_kmers=self.min_matching_kmers)
         barcode, bc_score, bc_start, bc_end = \
-            find_candidate_with_max_score_ssw(matching_barcodes, potential_barcode, min_score=self.min_score)
+            find_candidate_with_max_score_ssw(matching_barcodes, potential_barcode,
+                                              min_score=self.min_score,
+                                              score_diff=self.score_diff)
 
         if barcode is None:
             return TenXBarcodeDetectionResult(read_id, polyT=polyt_start, r1=r1_end)
@@ -137,18 +146,10 @@ def _find_barcode_umi_fwd(self, read_id: str, sequence: str) -> TenXBarcodeDetec
         potential_umi = sequence[potential_umi_start:potential_umi_end + 1]
         logger.debug("Potential UMI: %s" % potential_umi)
 
-        umi = None
         good_umi = False
-        if self.umi_set:
-            matching_umis = self.umi_indexer.get_occurrences(potential_umi)
-            umi, umi_score, umi_start, umi_end = \
-                find_candidate_with_max_score_ssw(matching_umis, potential_umi, min_score=7)
-            logger.debug("Found UMI %s %d-%d" % (umi, umi_start, umi_end))
-
-        if not umi:
-            umi = potential_umi
-            if self.UMI_LEN - self.UMI_LEN_DELTA <= len(umi) <= self.UMI_LEN + self.UMI_LEN_DELTA:
-                good_umi = True
+        umi = potential_umi
+        if self.UMI_LEN - self.UMI_LEN_DELTA <= len(umi) <= self.UMI_LEN + self.UMI_LEN_DELTA:
+            good_umi = True
 
         if not umi:
             return TenXBarcodeDetectionResult(read_id, barcode, BC_score=bc_score, polyT=polyt_start, r1=r1_end)
@@ -175,6 +176,10 @@ def find_barcode_umi_no_polya(self, read_id: str, sequence: str) -> TenXBarcodeD
     def result_type():
         return TenXBarcodeDetectionResult
 
+    @classmethod
+    def header(cls):
+        return cls.result_type().header()
+
 
 class VisiumHDBarcodeDetector:
     """Visium HD spatial transcriptomics barcode detector."""
@@ -201,10 +206,13 @@ def __init__(self, barcode_pair_list: List[List[str]]):
         self.r1_indexer = KmerIndexer([VisiumHDBarcodeDetector.R1], kmer_size=7)
         self.part1_list = barcode_pair_list[0]
         self.part2_list = barcode_pair_list[1]
-        self.part1_barcode_indexer = KmerIndexer(self.part1_list, kmer_size=7)
-        self.part2_barcode_indexer = KmerIndexer(self.part2_list, kmer_size=7)
-        self.umi_set = None
+
+        self.part1_barcode_indexer = ArrayKmerIndexer(self.part1_list, kmer_size=7)
+        self.part2_barcode_indexer = ArrayKmerIndexer(self.part2_list, kmer_size=7)
+        self.max_barcodes_hits = 20
+        self.min_matching_kmers = 2
         self.min_score = 13
+
         logger.debug("Min score set to %d" % self.min_score)
 
     def find_barcode_umi(self, read_id: str, sequence: str) -> TenXBarcodeDetectionResult:
@@ -344,3 +352,7 @@ def find_barcode_umi_no_polya(self, read_id: str, sequence: str) -> TenXBarcodeD
     @staticmethod
     def result_type():
         return TenXBarcodeDetectionResult
+
+    @classmethod
+    def header(cls):
+        return cls.result_type().header()
diff --git a/src/barcode_calling/callers/universal_extraction.py b/src/barcode_calling/callers/universal_extraction.py
new file mode 100644
index 00000000..de774ae7
--- /dev/null
+++ b/src/barcode_calling/callers/universal_extraction.py
@@ -0,0 +1,579 @@
+###########################################################################
+# Copyright (c) 2026 University of Helsinki
+# # All Rights Reserved
+# See file LICENSE for details.
+############################################################################
+
+import logging
+import math
+import sys
+
+from ..indexers import ArrayKmerIndexer, Array2BitKmerIndexer, Dict2BitKmerIndexer, KmerIndexer
+from ..common import batch_str_to_2bit
+from ...error_codes import IsoQuantExitCode
+from .extraction_result import DetectedElement, ExtractionResult
+from ..common import find_polyt, reverese_complement, detect_exact_positions, find_candidate_with_max_score_ssw
+from .molecule_structure import ElementType, MoleculeStructure, MoleculeElement
+
+logger = logging.getLogger('IsoQuant')
+
+
+class UniversalSingleMoleculeExtractor:
+    MIN_SCORE_COEFF = 0.75
+    MIN_SCORE_COEFF_TERMMINAL = 0.5
+    TERMINAL_MATCH_DELTA = 3
+    MAX_LEN_DIFF = 0.25
+    DEFAULT_ERROR_RATE = 0.03 # rought estimate for modern nanopore, doesn't matter that much
+    MAX_HITS = 10
+    SCORE_DIFF = 1
+
+    def __init__(self, molecule_structure: MoleculeStructure):
+        self.correct_sequences = True
+        self.molecule_structure = molecule_structure
+        self.index_dict = {}
+        self.min_scores = {}
+        self.has_polyt = False
+        self.has_cdna = False
+        self.constant_elements_to_detect = set()
+        self.elements_to_extract = set()
+        self.elements_to_correct = set()
+
+        for el in self.molecule_structure:
+            if el.element_type == ElementType.PolyT:
+                if self.has_polyt:
+                    logger.critical(
+                        "Current version only supports a single polyT, extraction results may be suboptimal")
+                    sys.exit(IsoQuantExitCode.INVALID_PARAMETER)
+                self.has_polyt = True
+            elif el.element_type == ElementType.cDNA:
+                if self.has_cdna:
+                    logger.critical(
+                        "Current version only supports a single cDNA, reads will not be split into molecules")
+                    sys.exit(IsoQuantExitCode.INVALID_PARAMETER)
+                self.has_cdna = True
+            elif el.element_type.is_constant():
+                self.constant_elements_to_detect.add(el.element_name)
+                self.index_dict[el.element_name] = KmerIndexer([el.element_value],
+                                                               max(6, int(el.element_length / 2) - 2))
+            elif el.element_type.needs_sequence_extraction():
+                self.elements_to_extract.add(el.element_name)
+            elif el.element_type.needs_correction():
+                if el.element_name in self.molecule_structure.elements_to_concatenate:
+                    # Concatenated parts: skip individual index, handled after loop
+                    pass
+                elif el.element_name in self.molecule_structure.duplicated_elements:
+                    # Duplicated parts: skip individual index, handled after loop
+                    pass
+                elif self.correct_sequences:
+                    self.prepare_barcode_index_for_element(el)
+                    self.elements_to_correct.add(el.element_name)
+                else:
+                    self.elements_to_extract.add(el.element_name)
+
+        # Build one index per concatenated base element using the full whitelist
+        if self.correct_sequences:
+            for base_name, count in self.molecule_structure.concatenated_elements_counts.items():
+                first_part_el = None
+                total_length = 0
+                for el in self.molecule_structure:
+                    if el.element_name in self.molecule_structure.elements_to_concatenate:
+                        if self.molecule_structure.elements_to_concatenate[el.element_name][0] == base_name:
+                            if first_part_el is None:
+                                first_part_el = el
+                            total_length += el.element_length
+                if first_part_el is not None and first_part_el.element_value is not None:
+                    barcode_list = first_part_el.element_value
+                    self.prepare_barcode_index(base_name, barcode_list, total_length)
+
+        # Build one index per duplicated base element using the full whitelist
+        if self.correct_sequences:
+            for base_name, count in self.molecule_structure.duplicated_elements_counts.items():
+                first_part_el = None
+                for el in self.molecule_structure:
+                    if el.element_name in self.molecule_structure.duplicated_elements:
+                        if self.molecule_structure.duplicated_elements[el.element_name][0] == base_name:
+                            first_part_el = el
+                            break
+                if first_part_el is not None and first_part_el.element_value is not None:
+                    barcode_list = first_part_el.element_value
+                    self.prepare_barcode_index(base_name, barcode_list, first_part_el.element_length)
+
+        if not self.has_cdna:
+            logger.critical("Molecule must include a cDNA")
+            sys.exit(IsoQuantExitCode.INVALID_PARAMETER)
+
+    def prepare_barcode_index(self, base_name, barcode_list, barcode_length):
+        # Check whether all barcodes have the same length
+        barcode_lengths = set(len(b) for b in barcode_list)
+        variable_length = len(barcode_lengths) > 1
+        if variable_length:
+            logger.warning("Barcodes for element %s have variable lengths (%s), "
+                           "performance may be suboptimal" %
+                           (base_name, ", ".join(str(l) for l in sorted(barcode_lengths))))
+            barcode_length = min(barcode_lengths)
+
+        barcode_count = len(barcode_list)
+        error_rate = self.DEFAULT_ERROR_RATE
+        barcode_sparsity = math.pow(4, barcode_length) / barcode_count
+        filling_edit_distance = math.ceil(math.log(barcode_sparsity, 8 * barcode_length))
+        error_probability = math.pow(1 - error_rate, barcode_length - filling_edit_distance) * math.pow(error_rate, filling_edit_distance) * math.comb(barcode_length, filling_edit_distance)
+        density = barcode_count * math.pow(8 * barcode_length, filling_edit_distance) / math.pow(4, barcode_length)
+        if error_probability > 0.01 or density > 1:
+            filling_edit_distance -= 1
+
+        if filling_edit_distance == 0:
+            self.min_scores[base_name] = barcode_length - 1
+        self.min_scores[base_name] = barcode_length - filling_edit_distance
+        if variable_length:
+            self.min_scores[base_name] = min(self.min_scores[base_name] + 1, barcode_length)
+        logger.info("Minimal score for element %s is set to %d" % (base_name, self.min_scores[base_name]))
+
+        if barcode_count > 1000000:
+            logger.warning("The number of barcodes for element %s is too large: %d, barcode calling may take substantial amount of time and RAM", (base_name, barcode_count))
+
+        kmer_size = min(max(6, int(barcode_length / 2) - 1), 15)
+        if kmer_size <= 8:
+            if barcode_count < 100000 or variable_length:
+                self.index_dict[base_name] = ArrayKmerIndexer(barcode_list, kmer_size)
+            else:
+                self.index_dict[base_name] = Array2BitKmerIndexer(batch_str_to_2bit(barcode_list, barcode_length),
+                                                                  kmer_size, seq_len=barcode_length)
+        else:
+            if barcode_count < 100000 or variable_length:
+                self.index_dict[base_name] = KmerIndexer(barcode_list, kmer_size)
+            else:
+                self.index_dict[base_name] = Dict2BitKmerIndexer(batch_str_to_2bit(barcode_list, barcode_length),
+                                                                 kmer_size, seq_len=barcode_length)
+        logger.info("Indexed %d barcodes for element %s" % (barcode_count, base_name))
+
+    def prepare_barcode_index_for_element(self, element: MoleculeElement):
+        if element.element_type not in (ElementType.VAR_LIST, ElementType.VAR_FILE):
+            return
+        barcode_list = element.element_value
+        self.prepare_barcode_index(element.element_name, barcode_list, element.element_length)
+
+    def result_type(self):
+        return ExtractionResult
+
+    def header(self):
+        """Get header matching ExtractionResult output format."""
+        # Create a dummy result to get the correct header format
+        dummy_result = ExtractionResult(self.molecule_structure, "", ".", {})
+        return dummy_result.header()
+
+    def find_barcode_umi(self, read_id, sequence):
+        detected_elements_fwd = self._find_patterns_fwd(read_id, sequence)
+        rev_seq = reverese_complement(sequence)
+        detected_elements_rev = self._find_patterns_fwd(read_id, rev_seq)
+
+        if self.has_polyt:
+            fwd_has_polyt = ElementType.PolyT.name in detected_elements_fwd
+            rev_has_polyt = ElementType.PolyT.name in detected_elements_rev
+            if fwd_has_polyt and not rev_has_polyt:
+                return ExtractionResult(self.molecule_structure, read_id, '+', detected_elements_fwd)
+            if not fwd_has_polyt and rev_has_polyt:
+                return ExtractionResult(self.molecule_structure, read_id, '-', detected_elements_rev)
+
+        if len(detected_elements_fwd) >= len(detected_elements_rev):
+            return ExtractionResult(self.molecule_structure, read_id, '+', detected_elements_fwd)
+        return ExtractionResult(self.molecule_structure, read_id, '-', detected_elements_rev)
+
+    def _find_patterns_fwd(self, read_id, sequence):
+        logger.debug("== read id %s ==" % read_id)
+        # TODO: make a list of DetectedElement rather that dict of str->DetectedElement for speed
+        detected_elements = {}
+        polyt_start, polyt_end = -1, -1
+        if self.has_polyt:
+            polyt_start, polyt_end = find_polyt(sequence)
+            detected_elements[ElementType.PolyT.name] = DetectedElement(polyt_start, polyt_end, 0)
+        logger.debug("PolyT %d" % polyt_start)
+
+        self.detect_const_elements(sequence, polyt_start, polyt_end, detected_elements)
+        concat_element_storage = {}
+        dupl_element_storage = {}
+        self.extract_variable_elements5(sequence, detected_elements, concat_element_storage, dupl_element_storage)
+        self.extract_variable_elements3(sequence, detected_elements, concat_element_storage, dupl_element_storage)
+        self.process_concatenated_elements(concat_element_storage, detected_elements, sequence)
+        self.process_duplicated_elements(dupl_element_storage, detected_elements, sequence)
+        logger.debug("== end read id %s ==" % read_id)
+        return detected_elements
+
+    def process_concatenated_elements(self, concatenated_element_storage, detected_results, sequence):
+        for base_name, parts in concatenated_element_storage.items():
+            # Check all parts were detected
+            if any(start == -1 for start, end in parts):
+                detected_results[base_name] = DetectedElement(-1, -1, -1, ExtractionResult.NOSEQ)
+                continue
+
+            # Concatenate sequences from all parts
+            concatenated_seq = ""
+            for start, end in parts:
+                concatenated_seq += sequence[start:end + 1]
+
+            # If correction is enabled and an index exists, correct against the whitelist
+            if self.correct_sequences and base_name in self.index_dict:
+                matching_sequences = self.index_dict[base_name].get_occurrences(concatenated_seq, max_hits=self.MAX_HITS)
+                corrected_seq, seq_score, _, _ = find_candidate_with_max_score_ssw(
+                    matching_sequences, concatenated_seq,
+                    min_score=self.min_scores[base_name], score_diff=self.SCORE_DIFF)
+                if corrected_seq is not None:
+                    detected_results[base_name] = DetectedElement(
+                        parts[0][0], parts[-1][1], seq_score, corrected_seq)
+                else:
+                    detected_results[base_name] = DetectedElement(-1, -1, -1, ExtractionResult.NOSEQ)
+            else:
+                # No correction: just store the raw concatenated sequence
+                detected_results[base_name] = DetectedElement(
+                    parts[0][0], parts[-1][1], 0, concatenated_seq)
+
+    def process_duplicated_elements(self, duplicated_element_storage, detected_results, sequence):
+        for base_name, parts in duplicated_element_storage.items():
+            # Extract sequences for detected parts
+            part_seqs = []
+            part_coords = []
+            for start, end in parts:
+                if start != -1:
+                    part_seqs.append(sequence[start:end + 1])
+                    part_coords.append((start, end))
+                else:
+                    part_seqs.append(None)
+                    part_coords.append((-1, -1))
+
+            detected_seqs = [s for s in part_seqs if s is not None]
+            if not detected_seqs:
+                detected_results[base_name] = DetectedElement(-1, -1, -1, ExtractionResult.NOSEQ)
+                continue
+
+            # Use first detected copy's coordinates for reporting
+            first_detected_idx = next(i for i, s in enumerate(part_seqs) if s is not None)
+
+            if self.correct_sequences and base_name in self.index_dict:
+                # Correct each copy independently
+                corrected = []
+                for seq in part_seqs:
+                    if seq is None:
+                        corrected.append((None, -1))
+                        continue
+                    matching = self.index_dict[base_name].get_occurrences(seq, max_hits=self.MAX_HITS)
+                    corrected_seq, score, _, _ = find_candidate_with_max_score_ssw(
+                        matching, seq, min_score=self.min_scores[base_name], score_diff=self.SCORE_DIFF)
+                    corrected.append((corrected_seq, score))
+
+                successful = [(seq, score) for seq, score in corrected if seq is not None]
+
+                if not successful:
+                    # Neither copy corrected
+                    detected_results[base_name] = DetectedElement(-1, -1, -1, ExtractionResult.NOSEQ)
+                elif len(successful) == 1:
+                    # One succeeded
+                    seq, score = successful[0]
+                    detected_results[base_name] = DetectedElement(
+                        part_coords[first_detected_idx][0],
+                        part_coords[first_detected_idx][1],
+                        score, seq)
+                else:
+                    # Multiple succeeded - check agreement
+                    unique_seqs = set(seq for seq, _ in successful)
+                    if len(unique_seqs) == 1:
+                        # All agree
+                        best_score = max(score for _, score in successful)
+                        detected_results[base_name] = DetectedElement(
+                            part_coords[first_detected_idx][0],
+                            part_coords[first_detected_idx][1],
+                            best_score, successful[0][0])
+                    else:
+                        # Copies disagree after correction
+                        detected_results[base_name] = DetectedElement(-1, -1, -1, ExtractionResult.NOSEQ)
+            else:
+                # No correction: compare raw sequences
+                if len(set(detected_seqs)) == 1:
+                    # All match
+                    detected_results[base_name] = DetectedElement(
+                        part_coords[first_detected_idx][0],
+                        part_coords[first_detected_idx][1],
+                        0, detected_seqs[0])
+                else:
+                    # Differ: report all comma-separated
+                    comma_sep = ",".join(detected_seqs)
+                    detected_results[base_name] = DetectedElement(
+                        part_coords[first_detected_idx][0],
+                        part_coords[first_detected_idx][1],
+                        0, comma_sep)
+
+    def correct_element(self, element: MoleculeElement, sequence: str, potential_start: int, potential_end: int) -> DetectedElement:
+        potential_seq = sequence[potential_start:potential_end + 1]
+        matching_sequences = self.index_dict[element.element_name].get_occurrences(potential_seq, max_hits=self.MAX_HITS)
+        corrected_seq, seq_score, seq_start, seq_end = \
+            find_candidate_with_max_score_ssw(matching_sequences, potential_seq,
+                                              min_score=self.min_scores[element.element_name], score_diff=self.SCORE_DIFF)
+        if corrected_seq is not None:
+            read_seq_start = potential_start + seq_start
+            read_seq_end = potential_start + seq_end - 1
+            return DetectedElement(read_seq_start, read_seq_end, seq_score, corrected_seq)
+        else:
+            return DetectedElement(-1, -1, -1, ExtractionResult.NOSEQ)
+
+    def _backup_concatenated_element(self, element, potential_start, potential_end, element_storage):
+        base_name, part_index = self.molecule_structure.elements_to_concatenate[element.element_name]
+        if base_name not in element_storage:
+            element_storage[base_name] = [(-1, -1) for _ in range(
+                self.molecule_structure.concatenated_elements_counts[base_name])]
+        # part_index is 1-based in MDF, convert to 0-based
+        element_storage[base_name][part_index - 1] = (potential_start, potential_end)
+
+    def _backup_duplicated_element(self, element, potential_start, potential_end, element_storage):
+        base_name, part_index = self.molecule_structure.duplicated_elements[element.element_name]
+        if base_name not in element_storage:
+            element_storage[base_name] = [(-1, -1) for _ in range(
+                self.molecule_structure.duplicated_elements_counts[base_name])]
+        # part_index is 1-based in MDF, convert to 0-based
+        element_storage[base_name][part_index - 1] = (potential_start, potential_end)
+
+    def _detect_variable_element(self, el, sequence, potential_start, potential_end,
+                                 detected_elements, concat_element_storage, dupl_element_storage,
+                                 len_diff_threshold=None):
+        potential_len = potential_end - potential_start + 1
+        if len_diff_threshold is None or abs(potential_len - el.element_length) <= len_diff_threshold * el.element_length:
+            if el.element_name in self.molecule_structure.elements_to_concatenate:
+                self._backup_concatenated_element(el, potential_start, potential_end, concat_element_storage)
+                return potential_start, potential_end
+
+            if el.element_name in self.molecule_structure.duplicated_elements:
+                self._backup_duplicated_element(el, potential_start, potential_end, dupl_element_storage)
+                return potential_start, potential_end
+
+            if el.element_name in self.elements_to_extract:
+                detected_elements[el.element_name] = DetectedElement(potential_start, potential_end, 0,
+                                                                     seq=sequence[potential_start:potential_end + 1])
+
+            elif el.element_name in self.elements_to_correct:
+                detected_element = self.correct_element(el, sequence, potential_start, potential_end)
+                if detected_element.start != -1:
+                    detected_elements[el.element_name] = detected_element
+                    return detected_element.start, detected_element.end
+        else:
+            detected_elements[el.element_name] = DetectedElement(-1, -1, -1, ExtractionResult.NOSEQ)
+
+        return potential_start, potential_end
+
+    def _detect_potential_start(self, element, element_index, potential_end, detected_elements):
+        if element_index == 0:
+            potential_start = potential_end - element.element_length + 1
+        else:
+            prev_el = self.molecule_structure.ordered_elements[element_index - 1]
+            if prev_el.element_name in detected_elements:
+                potential_start = detected_elements[prev_el].end + 1
+            else:
+                potential_start = potential_end - element.element_length + 1
+        if potential_start < 0:
+            potential_start = 0
+
+        return potential_start
+
+    def _detect_potential_end(self, sequence, element, element_index, potential_start, detected_elements):
+        if element_index + 1 == len(self.molecule_structure.ordered_elements):
+            potential_end = potential_start + element.element_length - 1
+        else:
+            next_el = self.molecule_structure.ordered_elements[element_index + 1]
+            if next_el.element_name in detected_elements:
+                potential_end = detected_elements[next_el.element_name].start - 1
+            else:
+                potential_end = potential_start + element.element_length - 1
+        if potential_end >= len(sequence):
+            potential_end = len(sequence) - 1
+        return potential_end
+
+    def extract_variable_elements5(self, sequence, detected_elements, concat_element_storage, dupl_element_storage):
+        first_detected_const_element = None
+        for i, el in enumerate(self.molecule_structure):
+            if el.element_type == ElementType.cDNA:
+                break
+            if (el.element_type == ElementType.PolyT or el.element_type.is_constant()) and el.element_name in detected_elements:
+                first_detected_const_element = i
+                break
+
+        if first_detected_const_element is None: return
+
+        # extracting elements preceding first detected const element
+        current_pos = detected_elements[
+                          self.molecule_structure.ordered_elements[first_detected_const_element].element_name].start - 1
+        for i in range(first_detected_const_element - 1, -1, -1):
+            el: MoleculeElement = self.molecule_structure.ordered_elements[i]
+
+            if el.element_type.is_base_separator():
+                current_pos -= el.element_length
+                continue
+            if not el.element_type.is_variable():
+                # we do not skip undetected const elements on the open side (e.g. here we go to the left of the first detected const element)
+                break
+
+            potential_end = current_pos
+            potential_start = potential_end - el.element_length + 1
+            if potential_start < 0: break
+
+            el_start, _ = self._detect_variable_element(el, sequence, potential_start, potential_end,
+                                                        detected_elements, concat_element_storage, dupl_element_storage)
+            current_pos = el_start - 1
+
+        current_pos = detected_elements[
+                          self.molecule_structure.ordered_elements[first_detected_const_element].element_name].end + 1
+        for i in range(first_detected_const_element + 1, len(self.molecule_structure.ordered_elements)):
+            if current_pos >= len(sequence):
+                break
+
+            el: MoleculeElement = self.molecule_structure.ordered_elements[i]
+            if el.element_type in [ElementType.cDNA, ElementType.PolyT]:
+                break
+            elif el.element_type.is_base_separator():
+                current_pos += el.element_length
+                continue
+            elif el.element_type.is_constant():
+                if el.element_name in detected_elements:
+                    current_pos = detected_elements[el.element_name].end + 1
+                else:
+                    current_pos += el.element_length
+                continue
+
+            potential_start = current_pos
+            potential_end = self._detect_potential_end(sequence, el, i, potential_start, detected_elements)
+
+            _, el_end = self._detect_variable_element(el, sequence, potential_start, potential_end,
+                                                      detected_elements, concat_element_storage, dupl_element_storage,
+                                                      len_diff_threshold=self.MAX_LEN_DIFF)
+            current_pos = el_end + 1
+
+
+    def extract_variable_elements3(self, sequence, detected_elements, concat_element_storage, dupl_element_storage):
+        last_detected_const_element = None
+        for i in range(len(self.molecule_structure.ordered_elements) - 1, -1, -1):
+            el = self.molecule_structure.ordered_elements[i]
+            if el.element_type == ElementType.cDNA:
+                break
+            if (el.element_type == ElementType.PolyT or el.element_type.is_constant()) and el.element_name in detected_elements:
+                last_detected_const_element = i
+                break
+
+        if last_detected_const_element is None: return
+
+        # extracting elements following the last detected const element
+        current_pos = detected_elements[
+                          self.molecule_structure.ordered_elements[last_detected_const_element].element_name].end + 1
+        for i in range(last_detected_const_element + 1, len(self.molecule_structure.ordered_elements)):
+            el: MoleculeElement = self.molecule_structure.ordered_elements[i]
+
+            if el.element_type.is_base_separator():
+                current_pos += el.element_length
+                continue
+            if not el.element_type.is_variable():
+                # we do not skip undetected const elements on the open side (e.g. here we go to the left of the first detected const element)
+                break
+
+            potential_start = current_pos
+            potential_end = potential_start + el.element_length - 1
+            if potential_end >= len(sequence): break
+
+            _, el_end = self._detect_variable_element(el, sequence, potential_start, potential_end,
+                                                      detected_elements, concat_element_storage, dupl_element_storage)
+            current_pos = el_end + 1
+
+        current_pos = detected_elements[
+                          self.molecule_structure.ordered_elements[last_detected_const_element].element_name].start - 1
+        for i in range(last_detected_const_element - 1, -1, -1):
+            if current_pos <= 0:
+                break
+            el: MoleculeElement = self.molecule_structure.ordered_elements[i]
+
+            if el.element_type in [ElementType.cDNA, ElementType.PolyT]:
+                break
+            elif el.element_type.is_base_separator():
+                current_pos -= el.element_length
+                continue
+            elif el.element_type.is_constant():
+                if el.element_name in detected_elements:
+                    current_pos = detected_elements[el.element_name].start - 1
+                else:
+                    current_pos -= el.element_length
+                continue
+
+            potential_end = current_pos
+            potential_start = self._detect_potential_start(el, i, potential_end, detected_elements)
+
+            el_start, _ = self._detect_variable_element(el, sequence, potential_start, potential_end,
+                                                        detected_elements, concat_element_storage, dupl_element_storage,
+                                                        len_diff_threshold=self.MAX_LEN_DIFF)
+            current_pos = el_start - 1
+
+    def detect_const_elements(self, sequence, polyt_start, polyt_end, detected_elements):
+        # searching left of cDNA
+        first_element_detected = False
+        current_search_start = 0
+        for el in self.molecule_structure:
+            if el.element_type in [ElementType.cDNA, ElementType.PolyT]:
+                break
+            if el.element_name not in self.constant_elements_to_detect:
+                continue
+
+            search_start = current_search_start
+            search_end = len(sequence) if polyt_start == -1 else polyt_start + 1
+            element_occurrences = self.index_dict[el.element_name].get_occurrences_substr(sequence,
+                                                                                          search_start,
+                                                                                          search_end)
+            if not first_element_detected:
+                min_score = int(el.element_length * self.MIN_SCORE_COEFF_TERMMINAL)
+                start_delta = -1
+            else:
+                min_score = int(el.element_length * self.MIN_SCORE_COEFF)
+                start_delta = self.TERMINAL_MATCH_DELTA
+
+            element_start, element_end = detect_exact_positions(sequence,
+                                                                search_start, search_end,
+                                                                self.index_dict[el.element_name].k,
+                                                                el.element_value,
+                                                                element_occurrences,
+                                                                min_score=min_score,
+                                                                start_delta=start_delta,
+                                                                end_delta=self.TERMINAL_MATCH_DELTA)
+
+
+            if element_start is not None:
+                current_search_start = element_end
+                if not first_element_detected:
+                    first_element_detected = True
+
+                detected_elements[el.element_name] = DetectedElement(element_start, element_end)
+
+        last_element_detected = False
+        current_search_end = len(sequence)
+        for i in range(len(self.molecule_structure.ordered_elements) - 1, -1, -1):
+            el = self.molecule_structure.ordered_elements[i]
+            if el.element_type in [ElementType.cDNA, ElementType.PolyT]:
+                break
+            if el.element_name not in self.constant_elements_to_detect:
+                continue
+
+            search_start = 0 if polyt_start == -1 else polyt_end + 1
+            search_end = current_search_end
+            element_occurrences = self.index_dict[el.element_name].get_occurrences_substr(sequence, search_start,
+                                                                                          search_end)
+
+            if not last_element_detected:
+                min_score = int(el.element_length * self.MIN_SCORE_COEFF_TERMMINAL)
+                end_delta = -1
+            else:
+                min_score = int(el.element_length * self.MIN_SCORE_COEFF)
+                end_delta = self.TERMINAL_MATCH_DELTA
+
+            element_start, element_end = detect_exact_positions(sequence,
+                                                                search_start, search_end,
+                                                                self.index_dict[el.element_name].k,
+                                                                el.element_value,
+                                                                element_occurrences,
+                                                                min_score=min_score,
+                                                                start_delta=self.TERMINAL_MATCH_DELTA,
+                                                                end_delta=end_delta)
+            if element_start is not None:
+                current_search_end = element_end
+                if not last_element_detected:
+                    last_element_detected = True
+
+                detected_elements[el.element_name] = DetectedElement(element_start, element_end)
diff --git a/src/barcode_calling/common.py b/src/barcode_calling/common.py
index f1b88d7f..6ed04bfc 100644
--- a/src/barcode_calling/common.py
+++ b/src/barcode_calling/common.py
@@ -4,11 +4,16 @@
 # See file LICENSE for details.
 ############################################################################
 import copy
+import os
+import sys
 import logging
-
-import numpy as np
+import gzip
+import numpy
 from ssw import AlignmentMgr
 
+from ..error_codes import IsoQuantExitCode
+
+
 logger = logging.getLogger('IsoQuant')
 
 # Try to import numba for JIT compilation of barcode conversion
@@ -23,11 +28,11 @@
     @njit(cache=True, parallel=True)
     def _compute_2bit_numba(encoded, shifts, n_barcodes, seq_len):
         """Numba JIT-compiled 2-bit encoding computation with parallel execution."""
-        result = np.zeros(n_barcodes, dtype=np.uint64)
+        result = numpy.zeros(n_barcodes, dtype=numpy.uint64)
         for i in prange(n_barcodes):
-            val = np.uint64(0)
+            val = numpy.uint64(0)
             for pos in range(seq_len):
-                val |= np.uint64(encoded[i, pos]) << shifts[pos]
+                val |= numpy.uint64(encoded[i, pos]) << shifts[pos]
             result[i] = val
         return result
 
@@ -56,6 +61,48 @@ def find_polyt_start(seq, window_size = 16, polya_fraction = 0.75):
     return i + max(0, seq[i:].find('TTTT'))
 
 
+def find_polyt(seq, window_size = 16, polya_fraction = 0.75):
+    polyA_count = int(window_size * polya_fraction)
+
+    if len(seq) < window_size:
+        return -1, -1
+    i = 0
+    a_count = seq[0:window_size].count('T')
+    while i < len(seq) - window_size:
+        if a_count >= polyA_count:
+            break
+        first_base_a = seq[i] == 'T'
+        new_base_a = i + window_size < len(seq) and seq[i + window_size] == 'T'
+        if first_base_a and not new_base_a:
+            a_count -= 1
+        elif not first_base_a and new_base_a:
+            a_count += 1
+        i += 1
+
+    if i >= len(seq) - window_size:
+        return -1, -1
+    polyt_start = i + max(0, seq[i:].find('TTTT'))
+
+    while i < len(seq) - window_size:
+        if a_count < polyA_count:
+            break
+        first_base_a = seq[i] == 'T'
+        new_base_a = i + window_size < len(seq) and seq[i + window_size] == 'T'
+        if first_base_a and not new_base_a:
+            a_count -= 1
+        elif not first_base_a and new_base_a:
+            a_count += 1
+        i += 1
+
+    if i >= len(seq) - window_size:
+        polyt_end = len(seq) - 1
+    else:
+        last_t_pos = seq[i:i + window_size].rfind('TTTT')
+        polyt_end = i + window_size - 1 if last_t_pos == -1 else i + last_t_pos + 4
+
+    return polyt_start, polyt_end
+
+
 base_comp = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N', " ": " "}
 
 
@@ -181,7 +228,7 @@ def detect_exact_positions(sequence, start, end, kmer_size, pattern, pattern_occ
         return None, None
     leftover_bases = len(pattern) - pattern_end - 1
     skipped_bases = pattern_start
-    return start_pos - skipped_bases, end_pos + leftover_bases
+    return max(0, start_pos - skipped_bases), min(end_pos + leftover_bases, len(sequence) - 1)
 
 
 def detect_first_exact_positions(sequence, start, end, kmer_size, pattern, pattern_occurrences: list,
@@ -234,7 +281,7 @@ def str_to_2bit(seq):
     return kmer_idx
 
 
-def batch_str_to_2bit(barcodes, seq_len=25):
+def batch_str_to_2bit(barcodes, seq_len):
     """
     Convert a list/iterator of barcode strings to 2-bit encoded integers.
 
@@ -249,7 +296,7 @@ def batch_str_to_2bit(barcodes, seq_len=25):
         numpy.ndarray of uint64 containing 2-bit encoded barcodes
     """
     # Convert iterator to list if needed, reading in chunks to show progress
-    if not isinstance(barcodes, (list, np.ndarray)):
+    if not isinstance(barcodes, (list, numpy.ndarray)):
         logger.info("Loading barcodes from iterator...")
         barcode_list = []
         chunk_size = 10_000_000
@@ -262,36 +309,36 @@ def batch_str_to_2bit(barcodes, seq_len=25):
 
     n_barcodes = len(barcodes)
     if n_barcodes == 0:
-        return np.array([], dtype=np.uint64)
+        return numpy.array([], dtype=numpy.uint64)
 
     logger.info("Converting %d barcodes to 2-bit encoding..." % n_barcodes)
 
     # Fast path: join all barcodes into single string, convert to bytes at once
     # This avoids Python loop overhead for encode()
     all_bytes = ''.join(barcodes).encode('ascii')
-    byte_array = np.frombuffer(all_bytes, dtype=np.uint8).reshape(n_barcodes, seq_len)
+    byte_array = numpy.frombuffer(all_bytes, dtype=numpy.uint8).reshape(n_barcodes, seq_len)
 
     # Apply the encoding: (ord(char) & 6) >> 1
     # A=65 -> 0, C=67 -> 1, T=84 -> 2, G=71 -> 3
     encoded = (byte_array & 6) >> 1
 
     # Compute bit shifts for each position (highest position = leftmost)
-    shifts = np.arange(seq_len - 1, -1, -1, dtype=np.uint64) * 2
+    shifts = numpy.arange(seq_len - 1, -1, -1, dtype=numpy.uint64) * 2
 
     # Compute 2-bit encoding - use Numba if available for parallel execution
     if NUMBA_AVAILABLE:
         result = _compute_2bit_numba(encoded, shifts, n_barcodes, seq_len)
     else:
         # Fallback: vectorized NumPy loop
-        result = np.zeros(n_barcodes, dtype=np.uint64)
+        result = numpy.zeros(n_barcodes, dtype=numpy.uint64)
         for pos in range(seq_len):
-            result |= encoded[:, pos].astype(np.uint64) << shifts[pos]
+            result |= encoded[:, pos].astype(numpy.uint64) << shifts[pos]
 
     logger.info("Barcode conversion complete")
     return result
 
 
-def batch_str_to_2bit_chunked(barcode_iterator, seq_len=25, chunk_size=50_000_000):
+def batch_str_to_2bit_chunked(barcode_iterator, seq_len, chunk_size=50_000_000):
     """
     Convert barcodes to 2-bit encoding in memory-efficient chunks.
 
@@ -331,33 +378,33 @@ def batch_str_to_2bit_chunked(barcode_iterator, seq_len=25, chunk_size=50_000_00
     logger.info("Barcode conversion complete: %d barcodes" % total_processed)
 
     if not all_results:
-        return np.array([], dtype=np.uint64)
-    return np.concatenate(all_results)
+        return numpy.array([], dtype=numpy.uint64)
+    return numpy.concatenate(all_results)
 
 
 def _convert_chunk_to_2bit(barcodes, seq_len):
     """Convert a chunk of barcodes to 2-bit encoding (internal helper)."""
     n_barcodes = len(barcodes)
     if n_barcodes == 0:
-        return np.array([], dtype=np.uint64)
+        return numpy.array([], dtype=numpy.uint64)
 
     # Fast path: join all barcodes into single string, convert to bytes at once
     all_bytes = ''.join(barcodes).encode('ascii')
-    byte_array = np.frombuffer(all_bytes, dtype=np.uint8).reshape(n_barcodes, seq_len)
+    byte_array = numpy.frombuffer(all_bytes, dtype=numpy.uint8).reshape(n_barcodes, seq_len)
 
     # Apply encoding: (ord(char) & 6) >> 1
     encoded = (byte_array & 6) >> 1
 
     # Compute shifts
-    shifts = np.arange(seq_len - 1, -1, -1, dtype=np.uint64) * 2
+    shifts = numpy.arange(seq_len - 1, -1, -1, dtype=numpy.uint64) * 2
 
     # Compute result - use Numba if available for parallel execution
     if NUMBA_AVAILABLE:
         result = _compute_2bit_numba(encoded, shifts, n_barcodes, seq_len)
     else:
-        result = np.zeros(n_barcodes, dtype=np.uint64)
+        result = numpy.zeros(n_barcodes, dtype=numpy.uint64)
         for pos in range(seq_len):
-            result |= encoded[:, pos].astype(np.uint64) << shifts[pos]
+            result |= encoded[:, pos].astype(numpy.uint64) << shifts[pos]
 
     return result
 
@@ -369,3 +416,36 @@ def bit_to_str(seq, seq_len):
     for i in range(seq_len):
         str_seq += BIN2NUCL[(seq >> ((seq_len - i - 1) * 2)) & 3]
     return str_seq
+
+
+
+def load_h5_barcodes_bit(h5_file_path, dataset_name='bpMatrix_1'):
+    raise NotImplementedError()
+    import h5py
+    barcode_list = []
+    with h5py.File(h5_file_path, 'r') as h5_file:
+        dataset = numpy.array(h5_file[dataset_name])
+        for row in dataset:
+            for col in row:
+                barcode_list.append(bit_to_str(int(col[0])))
+    return barcode_list
+
+
+def load_barcodes(inf, needs_iterator=False):
+    if not os.path.isfile(inf):
+        logger.critical("Barcode file '%s' does not exist.", inf)
+        sys.exit(IsoQuantExitCode.INPUT_FILE_NOT_FOUND)
+
+    if inf.endswith("h5") or inf.endswith("hdf5"):
+        return load_h5_barcodes_bit(inf)
+
+    if inf.endswith("gz") or inf.endswith("gzip"):
+        handle = gzip.open(inf, "rt")
+    else:
+        handle = open(inf, "r")
+
+    barcode_iterator = iter(l.strip().split()[0] for l in handle)
+    if needs_iterator:
+        return barcode_iterator
+
+    return list(barcode_iterator)
diff --git a/src/barcode_calling/indexers/base.py b/src/barcode_calling/indexers/base.py
index 1f190f7b..5e3ba232 100644
--- a/src/barcode_calling/indexers/base.py
+++ b/src/barcode_calling/indexers/base.py
@@ -108,6 +108,57 @@ def get_occurrences(self, sequence: str, max_hits: int = 0, min_kmers: int = 1,
             return result
         return result[:max_hits]
 
+    def _get_kmers_substr(self, seq, start, end):
+        if end - start + 1 < self.k:
+            return
+        kmer = seq[start:start + self.k]
+        yield kmer
+        for i in range(start + self.k, min(len(seq), end + 1)):
+            kmer = kmer[1:] + seq[i]
+            yield kmer
+
+    # [start, end]
+    def get_occurrences_substr(self, sequence, start, end, max_hits=0, min_kmers=1, hits_delta=1) -> List[Tuple[str, int, List[int]]]:
+        """
+        Find indexed sequences with shared k-mers.
+
+        Args:
+            sequence: Query sequence to search
+            max_hits: Maximum number of results (0 = unlimited)
+            min_kmers: Minimum shared k-mers required
+            hits_delta: Include results within this many k-mers of top hit
+            ignore_equal: Skip exact matches
+
+        Returns:
+            List of (sequence, shared_kmer_count, kmer_positions) tuples,
+            sorted by shared k-mer count (descending)
+        """
+        barcode_counts: DefaultDict[int, int] = defaultdict(int)
+        barcode_positions: DefaultDict[int, List[int]] = defaultdict(list)
+
+        for pos, kmer in enumerate(self._get_kmers_substr(sequence, start, end)):
+            for i in self.index[kmer]:
+                barcode_counts[i] += 1
+                barcode_positions[i].append(pos)
+
+        result = []
+        for i in barcode_counts.keys():
+            count = barcode_counts[i]
+            if count < min_kmers:
+                continue
+            result.append((self.seq_list[i], count, barcode_positions[i]))
+
+        if not result:
+            return []
+
+        top_hits = max(result, key=lambda x: x[1])[1]
+        result = filter(lambda x: x[1] >= top_hits - hits_delta, result)
+        result = list(sorted(result, reverse=True, key=lambda x: x[1]))
+
+        if max_hits == 0:
+            return result
+        return result[:max_hits]
+
 
 class ArrayKmerIndexer:
     """
diff --git a/src/barcode_calling/indexers/two_bit.py b/src/barcode_calling/indexers/two_bit.py
index 2c5d5290..7c9dbbb9 100644
--- a/src/barcode_calling/indexers/two_bit.py
+++ b/src/barcode_calling/indexers/two_bit.py
@@ -26,7 +26,7 @@ class Dict2BitKmerIndexer:
     Best for medium to large barcode sets.
     """
 
-    def __init__(self, known_bit_seqs: Iterable[int], kmer_size: int = 6, seq_len: int = 25):
+    def __init__(self, known_bit_seqs: Iterable[int], kmer_size: int, seq_len: int):
         """
         Initialize k-mer index with 2-bit encoded sequences.
 
@@ -131,7 +131,7 @@ class Array2BitKmerIndexer:
     Best for large barcode sets (e.g., single-cell whitelists).
     """
 
-    def __init__(self, known_bin_seq: Iterable[int], kmer_size: int = 12, seq_len: int = 25):
+    def __init__(self, known_bin_seq: Iterable[int], kmer_size: int, seq_len: int):
         """
         Initialize 2-bit k-mer index.
 
diff --git a/src/dataset_processor.py b/src/dataset_processor.py
index 9182c06c..026b6f6f 100644
--- a/src/dataset_processor.py
+++ b/src/dataset_processor.py
@@ -133,10 +133,11 @@ def process_sample(self, sample):
                                          len(sample.file_list) > 1 and
                                          self.args.read_group is not None and
                                          "file_name" in self.args.read_group)
-        if sample.use_technical_replicas:
-            logger.info("Technical replicas filtering enabled: novel transcripts must be confirmed by at least 2 files")
-        elif len(sample.file_list) > 1 and not self.args.use_replicas:
-            logger.info("Technical replicas filtering disabled by --use_replicas false")
+        if not self.args.no_model_construction:
+            if sample.use_technical_replicas:
+                logger.info("Technical replicas filtering enabled: novel transcripts must be confirmed by at least 2 files")
+            elif len(sample.file_list) > 1 and not self.args.use_replicas:
+                logger.info("Technical replicas filtering disabled by --use_replicas false")
 
         # Initialize all_read_groups as list of sets (one per grouping strategy)
         self.all_read_groups = [set() for _ in self.grouping_strategy_names]
@@ -496,7 +497,8 @@ def filter_umis(self, sample):
                        IsoQuantMode.curio: [3],
                        IsoQuantMode.visium_hd: [4],
                        IsoQuantMode.stereoseq: [4],
-                       IsoQuantMode.stereoseq_nosplit: [4]}
+                       IsoQuantMode.stereoseq_nosplit: [4],
+                       IsoQuantMode.custom_sc: [4]}
 
         for i, edit_distance in enumerate(umi_ed_dict[self.args.mode]):
             logger.info("Filtering PCR duplicates with edit distance %d" % edit_distance)
diff --git a/src/modes.py b/src/modes.py
index 632667f7..ff83ce83 100644
--- a/src/modes.py
+++ b/src/modes.py
@@ -16,6 +16,7 @@ class IsoQuantMode(Enum):
     stereoseq = 5
     visium_hd = 6
     visium_5prime = 7
+    custom_sc = 10
 
     def needs_barcode_calling(self):
         return self in [IsoQuantMode.tenX_v3,
@@ -23,7 +24,8 @@ def needs_barcode_calling(self):
                         IsoQuantMode.stereoseq_nosplit,
                         IsoQuantMode.stereoseq,
                         IsoQuantMode.visium_hd,
-                        IsoQuantMode.visium_5prime]
+                        IsoQuantMode.visium_5prime,
+                        IsoQuantMode.custom_sc]
 
     def needs_pcr_deduplication(self):
         return self in [IsoQuantMode.tenX_v3,
@@ -31,7 +33,8 @@ def needs_pcr_deduplication(self):
                         IsoQuantMode.stereoseq_nosplit,
                         IsoQuantMode.stereoseq,
                         IsoQuantMode.visium_hd,
-                        IsoQuantMode.visium_5prime]
+                        IsoQuantMode.visium_5prime,
+                        IsoQuantMode.custom_sc]
 
     def produces_new_fasta(self):
         return self in [IsoQuantMode.stereoseq]
diff --git a/tests/github/barcode_test_templates/stereo_single.cfg b/tests/github/barcode_test_templates/stereo_single.cfg
index dd2b4b42..9a9ae74e 100644
--- a/tests/github/barcode_test_templates/stereo_single.cfg
+++ b/tests/github/barcode_test_templates/stereo_single.cfg
@@ -12,14 +12,14 @@ run_type	single
 reads	/path/to/simulated_reads.fq.gz
 barcodes	/path/to/whitelist.txt
 
-# Barcode calling mode (for detect_barcodes.py)
+# Barcode calling mode (for isoquant_detect_barcodes.py)
 mode	stereoseq
 
 # Optional: number of threads
 threads	8
 
-# Optional: additional detect_barcodes.py options
-# detect_options	"--min_score 21"
+# Optional: additional isoquant_detect_barcodes.py options
+# detect_options	""
 
 # Quality assessment mode (for assess_barcode_quality.py)
 # If not specified, derived from 'mode'
diff --git a/tests/github/barcode_test_templates/stereo_split.cfg b/tests/github/barcode_test_templates/stereo_split.cfg
index 5d3061cb..38959b69 100644
--- a/tests/github/barcode_test_templates/stereo_split.cfg
+++ b/tests/github/barcode_test_templates/stereo_split.cfg
@@ -18,7 +18,7 @@ mode	stereoseq_split
 # Optional: number of threads
 threads	8
 
-# Optional: additional detect_barcodes.py options
+# Optional: additional isoquant_detect_barcodes.py options
 # detect_options	""
 
 # Quality assessment mode
diff --git a/tests/github/run_barcode_test.py b/tests/github/run_barcode_test.py
index 6a49b845..a08d45dc 100644
--- a/tests/github/run_barcode_test.py
+++ b/tests/github/run_barcode_test.py
@@ -8,7 +8,7 @@
 """
 Test runner for barcode calling quality assessment.
 
-Takes a configuration file as input, runs detect_barcodes.py on simulated data,
+Takes a configuration file as input, runs isoquant_detect_barcodes.py on simulated data,
 then runs quality assessment and compares results against etalon values.
 
 Usage:
@@ -91,7 +91,7 @@ def parse_args():
     parser.add_argument("--output", "-o", type=str,
                         help="Output directory (overrides config)")
     parser.add_argument("--additional_options", "-a", type=str,
-                        help="Additional options for detect_barcodes.py")
+                        help="Additional options for isoquant_detect_barcodes.py")
     parser.add_argument("--debug", "-d", action="store_true",
                         help="Enable debug output")
 
@@ -99,13 +99,13 @@ def parse_args():
 
 
 def run_detect_barcodes(args, config_dict):
-    """Run detect_barcodes.py with configuration options."""
+    """Run isoquant_detect_barcodes.py with configuration options."""
     source_dir = os.path.dirname(os.path.realpath(__file__))
     isoquant_dir = os.path.join(source_dir, "../../")
     config_file = args.config_file
 
     run_name = config_dict["name"]
-    # detect_barcodes.py uses -o as a file prefix, not a directory
+    # isoquant_detect_barcodes.py uses -o as a file prefix, not a directory
     # Output will be: {output_folder}/{run_name}.barcoded_reads.tsv
     output_folder = args.output if args.output else config_dict["output"]
     output_prefix = os.path.join(output_folder, run_name)
@@ -113,11 +113,11 @@ def run_detect_barcodes(args, config_dict):
     if not os.path.exists(output_folder):
         os.makedirs(output_folder)
 
-    log.info('== Running detect_barcodes.py ==')
+    log.info('== Running isoquant_detect_barcodes.py ==')
 
     # Build command
     detect_command = [
-        "python3", os.path.join(isoquant_dir, "detect_barcodes.py"),
+        "python3", os.path.join(isoquant_dir, "isoquant_detect_barcodes.py"),
         "-o", output_prefix,
         "--mode", config_dict["mode"]
     ]
@@ -139,6 +139,11 @@ def run_detect_barcodes(args, config_dict):
         barcode_files = fix_paths(config_file, config_dict["barcodes"])
         detect_command.extend(["--barcodes"] + barcode_files)
 
+    # Add molecule description file (for custom_sc mode)
+    if "molecule" in config_dict:
+        molecule_file = fix_path(config_file, config_dict["molecule"])
+        detect_command.extend(["--molecule", molecule_file])
+
     # Add optional parameters
     if "threads" in config_dict:
         detect_command.extend(["-t", config_dict["threads"]])
@@ -156,7 +161,7 @@ def run_detect_barcodes(args, config_dict):
     result = subprocess.run(detect_command)
 
     if result.returncode != 0:
-        log.error("detect_barcodes.py exited with non-zero status: %d" % result.returncode)
+        log.error("isoquant_detect_barcodes.py exited with non-zero status: %d" % result.returncode)
         return -11
 
     return 0
@@ -173,7 +178,7 @@ def run_barcode_quality(args, config_dict):
     output_prefix = os.path.join(output_folder, run_name)
 
     # Find barcode output file
-    # detect_barcodes.py outputs: {prefix}.barcoded_reads.tsv
+    # isoquant_detect_barcodes.py outputs: {prefix}.barcoded_reads.tsv
     barcode_output = output_prefix + ".barcoded_reads.tsv"
     if not os.path.exists(barcode_output):
         barcode_output = output_prefix + ".barcoded_reads.tsv.gz"
diff --git a/tests/github/run_pipeline.py b/tests/github/run_pipeline.py
index e749932a..3259e77a 100755
--- a/tests/github/run_pipeline.py
+++ b/tests/github/run_pipeline.py
@@ -31,6 +31,7 @@
 RT_QUANTIFICATION_NOVEL = "quantification_novel"
 RT_QUANTIFICATION_GENES = "quantification_genes"
 RT_PERFORMANCE = "performance"
+RT_ALLINFO = "allinfo"
 
 
 log = logging.getLogger('GitHubRunner')
@@ -433,6 +434,78 @@ def run_performance_assessment(args, config_dict):
     return exit_code
 
 
+def run_allinfo_quality(args, config_dict):
+    log.info('== Running allinfo quality assessment ==')
+    config_file = args.config_file
+    source_dir = os.path.dirname(os.path.realpath(__file__))
+    isoquant_dir = os.path.join(source_dir, "../../")
+
+    run_name = config_dict["name"]
+    label = config_dict["label"]
+    output_folder = os.path.join(args.output if args.output else config_dict["output"], run_name)
+    internal_output_dir = os.path.join(output_folder, label)
+
+    # Edit distance from config or default (3 for 10x/curio, 4 for stereo/visium_hd)
+    edit_distance = int(config_dict.get("edit_distance", "3"))
+    allinfo_file = os.path.join(internal_output_dir, "%s.UMI_filtered.ED%d.allinfo" % (label, edit_distance))
+    stats_file = os.path.join(internal_output_dir, "%s.UMI_filtered.ED%d.stats.tsv" % (label, edit_distance))
+
+    # Support gzipped allinfo
+    if not os.path.exists(allinfo_file) and os.path.exists(allinfo_file + ".gz"):
+        allinfo_file = allinfo_file + ".gz"
+
+    if not os.path.exists(allinfo_file):
+        log.error("Allinfo file not found: %s" % allinfo_file)
+        return -5
+    if not os.path.exists(stats_file):
+        log.error("Stats file not found: %s" % stats_file)
+        return -5
+
+    quality_report = os.path.join(output_folder, "allinfo_quality_report.tsv")
+
+    qa_command = ["python3", os.path.join(isoquant_dir, "misc/assess_allinfo_quality.py"),
+                  "--allinfo", allinfo_file, "--stats", stats_file,
+                  "--output", quality_report]
+
+    log.info("Running: %s" % " ".join(qa_command))
+    result = subprocess.run(qa_command)
+    if result.returncode != 0:
+        log.error("Allinfo QA script failed with code %d" % result.returncode)
+        return -11
+
+    # Compare against etalon if provided
+    if "etalon_allinfo" not in config_dict:
+        return 0
+
+    etalon_file = fix_path(config_file, config_dict["etalon_allinfo"])
+    if not os.path.exists(etalon_file):
+        log.error("Etalon file not found: %s" % etalon_file)
+        return -35
+
+    etalon_dict = load_tsv_config(etalon_file)
+    quality_dict = load_tsv_config(quality_report)
+
+    exit_code = 0
+    tolerance = float(config_dict.get("tolerance", "0.01"))
+    new_etalon_path = os.path.join(output_folder, "new_allinfo_etalon.tsv")
+    with open(new_etalon_path, "w") as new_etalon_outf:
+        for metric_name, val_str in quality_dict.items():
+            new_etalon_outf.write("%s\t%s\n" % (metric_name, val_str))
+
+    for metric_name, etalon_val_str in etalon_dict.items():
+        etalon_val = float(etalon_val_str)
+        if metric_name not in quality_dict:
+            log.error("Metric %s not found in quality report" % metric_name)
+            exit_code = -36
+            continue
+        output_val = float(quality_dict[metric_name])
+        err = check_value(etalon_val, output_val, metric_name, tolerance)
+        if err != 0:
+            exit_code = err
+
+    return exit_code
+
+
 def check_output_files(out_dir, label, file_list):
     missing_files = []
     internal_output_dir = os.path.join(out_dir, label)
@@ -488,6 +561,8 @@ def main():
         err_codes.append(run_quantification(args, config_dict, "gene"))
     if RT_PERFORMANCE in run_types:
         err_codes.append(run_performance_assessment(args, config_dict))
+    if RT_ALLINFO in run_types:
+        err_codes.append(run_allinfo_quality(args, config_dict))
 
     if "check_input_files" in config_dict:
         files_list = config_dict["check_input_files"].split()
diff --git a/tests/test_barcode_callers.py b/tests/test_barcode_callers.py
index a0b1cbf0..e5fdc5eb 100644
--- a/tests/test_barcode_callers.py
+++ b/tests/test_barcode_callers.py
@@ -100,6 +100,7 @@ def test_str_format(self):
 
     def test_header(self):
         """Test TSV header."""
+        # header() is a static method
         header = BarcodeDetectionResult.header()
         assert "#read_id" in header
         assert "barcode" in header
@@ -407,6 +408,7 @@ def test_add_read_with_barcode(self):
         result = LinkerBarcodeDetectionResult(
             "read_001",
             barcode="ACTG",
+            UMI="GGGGGGGG",  # Need actual UMI for has_umi() to return True
             UMI_good=True,
             polyT=100
         )
@@ -458,7 +460,8 @@ def test_add_custom_stats(self):
     def test_str_format(self):
         """Test string formatting."""
         stats = ReadStats()
-        result = LinkerBarcodeDetectionResult("read_001", barcode="ACTG", UMI_good=True)
+        # Need actual UMI for has_umi() to return True
+        result = LinkerBarcodeDetectionResult("read_001", barcode="ACTG", UMI="GGGGGGGG", UMI_good=True)
         stats.add_read(result)
 
         output = str(stats)
diff --git a/tests/test_barcode_common.py b/tests/test_barcode_common.py
index 19a89533..2e04a94d 100644
--- a/tests/test_barcode_common.py
+++ b/tests/test_barcode_common.py
@@ -303,7 +303,7 @@ class TestBatchStrTo2Bit:
     def test_empty_input(self):
         """Test empty input returns empty array."""
         import numpy as np
-        result = batch_str_to_2bit([])
+        result = batch_str_to_2bit([], 25)
         assert len(result) == 0
         assert result.dtype == np.uint64
 
@@ -361,7 +361,7 @@ class TestBatchStrTo2BitChunked:
     def test_empty_input(self):
         """Test empty iterator returns empty array."""
         import numpy as np
-        result = batch_str_to_2bit_chunked(iter([]))
+        result = batch_str_to_2bit_chunked(iter([]), 25)
         assert len(result) == 0
         assert result.dtype == np.uint64
 
diff --git a/tests/test_barcode_detectors.py b/tests/test_barcode_detectors.py
index fc09bba3..a08b7754 100644
--- a/tests/test_barcode_detectors.py
+++ b/tests/test_barcode_detectors.py
@@ -93,18 +93,18 @@ class TestStereoBarcodeDetector:
 
     def test_init(self):
         """Test basic initialization."""
-        detector = StereoBarcodeDetector(DUMMY_BARCODES_25, min_score=21)
+        detector = StereoBarcodeDetector(DUMMY_BARCODES_25)
         assert detector.barcode_indexer is not None
         assert detector.min_score == 21
 
     def test_init_empty_barcodes(self):
         """Test initialization with empty barcode list."""
-        detector = StereoBarcodeDetector([], min_score=21)
+        detector = StereoBarcodeDetector([])
         assert detector.barcode_indexer is None
 
     def test_no_match_random_sequence(self):
         """Test that random sequence returns no valid barcode."""
-        detector = StereoBarcodeDetector(DUMMY_BARCODES_25, min_score=21)
+        detector = StereoBarcodeDetector(DUMMY_BARCODES_25)
         result = detector.find_barcode_umi("test_read", RANDOM_SEQUENCE)
 
         assert isinstance(result, (LinkerBarcodeDetectionResult, TSOBarcodeDetectionResult))
@@ -141,7 +141,7 @@ def test_real_sequence(self, read_sequence, true_barcode, true_umi):
                     "CATCAAAGGTTCACCGAATCTCCAC",
                     "GTCGCCTTCAACGTATACTGATATA",
                     "ACCAATTAGCAGCCATGGGAACTCC"]
-        detector = StereoBarcodeDetector(barcodes, min_score=21)
+        detector = StereoBarcodeDetector(barcodes)
         result = detector.find_barcode_umi("test_read", read_sequence)
 
         assert isinstance(result, (LinkerBarcodeDetectionResult, TSOBarcodeDetectionResult))
@@ -196,7 +196,7 @@ def test_real_sequence(self, read_sequence, true_barcodes, true_umis):
                     "GGGCACATTCCTCAGAAAGCGAAAA",
                     "GGCAGGCCATTAGTTACTCGTCGAG",
                     "CTCCCCTAATCGTTTGTCGGTGGAC"]
-        detector = StereoSplittingBarcodeDetector(barcodes, min_score=21)
+        detector = StereoSplittingBarcodeDetector(barcodes)
         result = detector.find_barcode_umi("test_read", read_sequence)
 
         assert isinstance(result, SplittingBarcodeDetectionResult)
@@ -214,13 +214,13 @@ class TestSharedMemStereoBarcodeDetector:
 
     def test_init(self):
         """Test basic initialization."""
-        detector = SharedMemoryStereoBarcodeDetector(DUMMY_BARCODES_25, min_score=21)
+        detector = SharedMemoryStereoBarcodeDetector(DUMMY_BARCODES_25)
         assert detector.barcode_indexer is not None
         assert detector.min_score == 21
 
     def test_no_match_random_sequence(self):
         """Test that random sequence returns no valid barcode."""
-        detector = SharedMemoryStereoBarcodeDetector(DUMMY_BARCODES_25, min_score=21)
+        detector = SharedMemoryStereoBarcodeDetector(DUMMY_BARCODES_25)
         result = detector.find_barcode_umi("test_read", RANDOM_SEQUENCE)
 
         assert isinstance(result, (LinkerBarcodeDetectionResult, TSOBarcodeDetectionResult))
@@ -266,7 +266,7 @@ def test_real_sequence(self, read_sequence, true_barcode, true_umi):
                     "CATCAAAGGTTCACCGAATCTCCAC",
                     "GTCGCCTTCAACGTATACTGATATA",
                     "ACCAATTAGCAGCCATGGGAACTCC"]
-        detector = SharedMemoryStereoBarcodeDetector(barcodes, min_score=21)
+        detector = SharedMemoryStereoBarcodeDetector(barcodes)
         result = detector.find_barcode_umi("test_read", read_sequence)
 
         assert isinstance(result, (LinkerBarcodeDetectionResult, TSOBarcodeDetectionResult))
@@ -289,7 +289,7 @@ def test_real_sequence_no_bc(self, read_sequence):
                     "CATCAAAGGTTCACCGAATCTCCAC",
                     "GTCGCCTTCAACGTATACTGATATA",
                     "ACCAATTAGCAGCCATGGGAACTCC"]
-        detector = SharedMemoryStereoBarcodeDetector(barcodes, min_score=21)
+        detector = SharedMemoryStereoBarcodeDetector(barcodes)
         result = detector.find_barcode_umi("test_read", read_sequence)
 
         assert isinstance(result, (LinkerBarcodeDetectionResult, TSOBarcodeDetectionResult))
@@ -338,7 +338,7 @@ def test_real_sequence(self, read_sequence, true_barcodes, true_umis):
                     "GGGCACATTCCTCAGAAAGCGAAAA",
                     "GGCAGGCCATTAGTTACTCGTCGAG",
                     "CTCCCCTAATCGTTTGTCGGTGGAC"]
-        detector = SharedMemoryStereoSplittingBarcodeDetector(barcodes, min_score=21)
+        detector = SharedMemoryStereoSplittingBarcodeDetector(barcodes)
         result = detector.find_barcode_umi("test_read", read_sequence)
 
         assert isinstance(result, SplittingBarcodeDetectionResult)
@@ -370,7 +370,7 @@ def test_real_sequence_no_bc(self, read_sequence):
                     "GGGCACATTCCTCAGAAAGCGAAAA",
                     "GGCAGGCCATTAGTTACTCGTCGAG",
                     "CTCCCCTAATCGTTTGTCGGTGGAC"]
-        detector = SharedMemoryStereoSplittingBarcodeDetector(barcodes, min_score=21)
+        detector = SharedMemoryStereoSplittingBarcodeDetector(barcodes)
         result = detector.find_barcode_umi("test_read", read_sequence)
 
         assert isinstance(result, SplittingBarcodeDetectionResult)
@@ -399,7 +399,7 @@ def test_init_large_whitelist(self):
         """Test that min_score increases for large whitelists."""
         large_barcodes = [f"ACTG{i:012d}"[:16] for i in range(100001)]
         detector = TenXBarcodeDetector(large_barcodes)
-        assert detector.min_score == 16
+        assert detector.min_score == 15
 
     def test_no_match_random_sequence(self):
         """Test that random sequence returns no valid barcode."""
@@ -567,20 +567,20 @@ class TestCurioBarcodeDetector:
 
     def test_init_joint_barcodes(self):
         """Test initialization with joint barcode list."""
-        detector = CurioBarcodeDetector(DUMMY_BARCODES_14, min_score=13)
+        detector = CurioBarcodeDetector(DUMMY_BARCODES_14)
         assert detector.barcode_indexer is not None
         assert detector.min_score == 13
 
     def test_init_split_barcodes(self):
         """Test initialization with separate left/right barcode lists."""
         barcode_tuple = (DUMMY_BARCODES_8, DUMMY_BARCODES_6)
-        detector = CurioBarcodeDetector(barcode_tuple, min_score=13)
+        detector = CurioBarcodeDetector(barcode_tuple)
         # Should have 3*3 = 9 combined barcodes
         assert len(detector.barcode_indexer.seq_list) == 9
 
     def test_no_match_random_sequence(self):
         """Test that random sequence returns no valid barcode."""
-        detector = CurioBarcodeDetector(DUMMY_BARCODES_14, min_score=13)
+        detector = CurioBarcodeDetector(DUMMY_BARCODES_14)
         result = detector.find_barcode_umi("test_read", RANDOM_SEQUENCE)
 
         assert isinstance(result, LinkerBarcodeDetectionResult)
@@ -637,13 +637,13 @@ class TestCurioIlluminaDetector:
 
     def test_init(self):
         """Test basic initialization."""
-        detector = CurioIlluminaDetector(DUMMY_BARCODES_14, min_score=14)
+        detector = CurioIlluminaDetector(DUMMY_BARCODES_14)
         assert detector.barcode_indexer is not None
         assert detector.min_score == 14
 
     def test_no_match_random_sequence(self):
         """Test that random sequence returns no valid barcode."""
-        detector = CurioIlluminaDetector(DUMMY_BARCODES_14, min_score=14)
+        detector = CurioIlluminaDetector(DUMMY_BARCODES_14)
         result = detector.find_barcode_umi("test_read", RANDOM_SEQUENCE)
 
         assert isinstance(result, LinkerBarcodeDetectionResult)
diff --git a/tests/test_molecule_structure.py b/tests/test_molecule_structure.py
new file mode 100644
index 00000000..127d2e71
--- /dev/null
+++ b/tests/test_molecule_structure.py
@@ -0,0 +1,631 @@
+############################################################################
+# Copyright (c) 2025-2026 University of Helsinki
+# All Rights Reserved
+# See file LICENSE for details.
+############################################################################
+
+"""
+Tests for molecule structure parsing and element types.
+
+Tests the universal barcode calling infrastructure:
+- ElementType enum and its method classification
+- MoleculeElement initialization for different element types
+- MoleculeStructure parsing from MDF format
+"""
+
+import os
+import pytest
+from io import StringIO
+
+from src.barcode_calling.callers.molecule_structure import (
+    ElementType,
+    MoleculeElement,
+    MoleculeStructure
+)
+
+# Test data directory
+TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "universal_data")
+
+
+class TestElementType:
+    """Test ElementType enum methods."""
+
+    def test_needs_correction_var_file(self):
+        """VAR_FILE elements need barcode correction."""
+        assert ElementType.VAR_FILE.needs_correction() is True
+
+    def test_needs_correction_var_list(self):
+        """VAR_LIST elements need barcode correction."""
+        assert ElementType.VAR_LIST.needs_correction() is True
+
+    def test_needs_correction_var_any(self):
+        """VAR_ANY elements do NOT need correction."""
+        assert ElementType.VAR_ANY.needs_correction() is False
+
+    def test_needs_correction_const(self):
+        """CONST elements do NOT need correction."""
+        assert ElementType.CONST.needs_correction() is False
+
+    def test_needs_sequence_extraction_var_any(self):
+        """VAR_ANY elements need sequence extraction."""
+        assert ElementType.VAR_ANY.needs_sequence_extraction() is True
+
+    def test_needs_sequence_extraction_var_file(self):
+        """VAR_FILE elements do NOT need sequence extraction (they get corrected)."""
+        assert ElementType.VAR_FILE.needs_sequence_extraction() is False
+
+    def test_needs_sequence_extraction_const(self):
+        """CONST elements do NOT need sequence extraction."""
+        assert ElementType.CONST.needs_sequence_extraction() is False
+
+    def test_is_constant_const(self):
+        """CONST is a constant element."""
+        assert ElementType.CONST.is_constant() is True
+
+    def test_is_constant_var_any(self):
+        """VAR_ANY is NOT a constant element."""
+        assert ElementType.VAR_ANY.is_constant() is False
+
+    def test_is_constant_polyt(self):
+        """PolyT is NOT a constant element."""
+        assert ElementType.PolyT.is_constant() is False
+
+    def test_is_variable_var_any(self):
+        """VAR_ANY is a variable element."""
+        assert ElementType.VAR_ANY.is_variable() is True
+
+    def test_is_variable_var_list(self):
+        """VAR_LIST is a variable element."""
+        assert ElementType.VAR_LIST.is_variable() is True
+
+    def test_is_variable_var_file(self):
+        """VAR_FILE is a variable element."""
+        assert ElementType.VAR_FILE.is_variable() is True
+
+    def test_is_variable_const(self):
+        """CONST is NOT a variable element."""
+        assert ElementType.CONST.is_variable() is False
+
+    def test_is_variable_polyt(self):
+        """PolyT is NOT a variable element."""
+        assert ElementType.PolyT.is_variable() is False
+
+    def test_is_base_separator_var_any_separator(self):
+        """VAR_ANY_SEPARATOR is a separator."""
+        assert ElementType.VAR_ANY_SEPARATOR.is_base_separator() is True
+
+    def test_is_base_separator_var_any_non_t_separator(self):
+        """VAR_ANY_NON_T_SEPARATOR is a separator."""
+        assert ElementType.VAR_ANY_NON_T_SEPARATOR.is_base_separator() is True
+
+    def test_is_base_separator_var_any(self):
+        """VAR_ANY is NOT a separator."""
+        assert ElementType.VAR_ANY.is_base_separator() is False
+
+
+class TestMoleculeElement:
+    """Test MoleculeElement initialization."""
+
+    def test_init_polyt(self):
+        """Test PolyT element initialization."""
+        element = MoleculeElement("PolyT", ElementType.PolyT)
+
+        assert element.element_name == "PolyT"
+        assert element.element_type == ElementType.PolyT
+        assert element.element_value is None
+        assert element.element_length == -1
+
+    def test_init_cdna(self):
+        """Test cDNA element initialization."""
+        element = MoleculeElement("cDNA", ElementType.cDNA)
+
+        assert element.element_name == "cDNA"
+        assert element.element_type == ElementType.cDNA
+        assert element.element_value is None
+        assert element.element_length == -1
+
+    def test_init_const(self):
+        """Test CONST element initialization."""
+        sequence = "CTACACGACGCTCTTCCGATCT"
+        element = MoleculeElement("R1", ElementType.CONST, sequence)
+
+        assert element.element_name == "R1"
+        assert element.element_type == ElementType.CONST
+        assert element.element_value == sequence
+        assert element.element_length == len(sequence)
+
+    def test_init_var_list(self):
+        """Test VAR_LIST element initialization."""
+        barcodes = "AAAA,CCCC,GGGG,TTTT"
+        element = MoleculeElement("Barcode", ElementType.VAR_LIST, barcodes)
+
+        assert element.element_name == "Barcode"
+        assert element.element_type == ElementType.VAR_LIST
+        assert element.element_value == ["AAAA", "CCCC", "GGGG", "TTTT"]
+        assert element.element_length == 4  # Length of first barcode
+
+    def test_init_var_any(self):
+        """Test VAR_ANY element initialization."""
+        element = MoleculeElement("UMI", ElementType.VAR_ANY, "12")
+
+        assert element.element_name == "UMI"
+        assert element.element_type == ElementType.VAR_ANY
+        assert element.element_value is None
+        assert element.element_length == 12
+
+    def test_init_var_file(self):
+        """Test VAR_FILE element initialization with real file."""
+        barcode_file = os.path.join(TEST_DATA_DIR, "barcodes_small.tsv")
+        element = MoleculeElement("Barcode", ElementType.VAR_FILE, barcode_file)
+
+        assert element.element_name == "Barcode"
+        assert element.element_type == ElementType.VAR_FILE
+        assert len(element.element_value) == 8  # 8 barcodes in test file
+        assert element.element_length == 16  # 16bp barcodes
+
+    def test_init_var_any_separator(self):
+        """Test VAR_ANY_SEPARATOR element initialization."""
+        element = MoleculeElement("Sep", ElementType.VAR_ANY_SEPARATOR, "5")
+
+        assert element.element_name == "Sep"
+        assert element.element_type == ElementType.VAR_ANY_SEPARATOR
+        assert element.element_value is None
+        assert element.element_length == 5
+
+
+class TestMoleculeStructure:
+    """Test MoleculeStructure parsing."""
+
+    def test_parse_simple_structure(self):
+        """Test parsing simple molecule structure."""
+        mdf_content = """Barcode:UMI:PolyT:cDNA
+Barcode\tVAR_LIST\tAAAA,CCCC,GGGG
+UMI\tVAR_ANY\t10
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        elements = list(structure)
+        assert len(elements) == 4
+
+        assert elements[0].element_name == "Barcode"
+        assert elements[0].element_type == ElementType.VAR_LIST
+
+        assert elements[1].element_name == "UMI"
+        assert elements[1].element_type == ElementType.VAR_ANY
+
+        assert elements[2].element_name == "PolyT"
+        assert elements[2].element_type == ElementType.PolyT
+
+        assert elements[3].element_name == "cDNA"
+        assert elements[3].element_type == ElementType.cDNA
+
+    def test_parse_with_const(self):
+        """Test parsing structure with constant element."""
+        mdf_content = """R1:Barcode:UMI:PolyT:cDNA
+R1\tCONST\tCTACACGACGCTCTTCCGATCT
+Barcode\tVAR_LIST\tAAAA,CCCC,GGGG
+UMI\tVAR_ANY\t12
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        elements = list(structure)
+        assert len(elements) == 5
+
+        assert elements[0].element_name == "R1"
+        assert elements[0].element_type == ElementType.CONST
+        assert elements[0].element_value == "CTACACGACGCTCTTCCGATCT"
+
+    def test_parse_from_file_simple(self):
+        """Test parsing simple.mdf file."""
+        mdf_path = os.path.join(TEST_DATA_DIR, "simple.mdf")
+
+        with open(mdf_path) as f:
+            structure = MoleculeStructure(f)
+
+        elements = list(structure)
+
+        # Simple.mdf: Barcode:UMI:PolyT:cDNA
+        assert len(elements) == 4
+        assert elements[0].element_name == "Barcode"
+        assert elements[0].element_type == ElementType.VAR_LIST
+        assert elements[1].element_name == "UMI"
+        assert elements[1].element_type == ElementType.VAR_ANY
+        assert elements[1].element_length == 10
+
+    def test_parse_from_file_10x(self):
+        """Test parsing 10x-like structure.
+
+        Uses inline definition to avoid relative path issues with VAR_FILE.
+        """
+        # Use inline definition to avoid file path issues
+        mdf_content = """R1:Barcode:UMI:PolyT:cDNA:TSO
+R1\tCONST\tCTACACGACGCTCTTCCGATCT
+Barcode\tVAR_LIST\tATCCTTAGTGTTTGTC,CTTAGGAGTGGTTAGC,TGAGGGCCAACGTGCT
+UMI\tVAR_ANY\t12
+TSO\tCONST\tCCCATGTACTCTGCGTTGATACCACTGCTT
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        elements = list(structure)
+
+        # R1:Barcode:UMI:PolyT:cDNA:TSO
+        assert len(elements) == 6
+        assert elements[0].element_name == "R1"
+        assert elements[0].element_type == ElementType.CONST
+        assert elements[1].element_name == "Barcode"
+        assert elements[1].element_type == ElementType.VAR_LIST
+
+    def test_iteration(self):
+        """Test iterating over structure elements."""
+        mdf_content = """Barcode:UMI:cDNA
+Barcode\tVAR_LIST\tAAAA,CCCC
+UMI\tVAR_ANY\t8
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        names = [el.element_name for el in structure]
+        assert names == ["Barcode", "UMI", "cDNA"]
+
+    def test_from_element_list(self):
+        """Test creating structure from element list."""
+        elements = [
+            MoleculeElement("Barcode", ElementType.VAR_LIST, "AAAA,CCCC"),
+            MoleculeElement("UMI", ElementType.VAR_ANY, "10"),
+            MoleculeElement("cDNA", ElementType.cDNA)
+        ]
+
+        structure = MoleculeStructure.from_element_list(elements)
+
+        assert len(list(structure)) == 3
+        assert list(structure)[0].element_name == "Barcode"
+
+    def test_concat_delimiter(self):
+        """Test concatenation delimiter constant."""
+        assert MoleculeStructure.CONCAT_DELIM == "|"
+
+    def test_dupl_delimiter(self):
+        """Test duplication delimiter constant."""
+        assert MoleculeStructure.DUPL_DELIM == "/"
+
+
+class TestMoleculeStructureBarcodeUMI:
+    """Test barcode/UMI element identification."""
+
+    def test_barcode_elements_single(self):
+        """Test single barcode element identification."""
+        mdf_content = """Barcode:UMI:cDNA
+Barcode\tVAR_LIST\tAAAA,CCCC
+UMI\tVAR_ANY\t10
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert structure.barcode_elements == ["Barcode"]
+
+    def test_umi_elements_single(self):
+        """Test single UMI element identification."""
+        mdf_content = """Barcode:UMI:cDNA
+Barcode\tVAR_LIST\tAAAA,CCCC
+UMI\tVAR_ANY\t10
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert structure.umi_elements == ["UMI"]
+
+    def test_barcode_elements_multiple(self):
+        """Test multiple barcode elements identification."""
+        mdf_content = """barcode1:barcode2:UMI:cDNA
+barcode1\tVAR_LIST\tAAAA,CCCC
+barcode2\tVAR_LIST\tGGGG,TTTT
+UMI\tVAR_ANY\t10
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert structure.barcode_elements == ["barcode1", "barcode2"]
+
+    def test_umi_elements_multiple(self):
+        """Test multiple UMI elements identification."""
+        mdf_content = """Barcode:UMI1:UMI2:cDNA
+Barcode\tVAR_LIST\tAAAA,CCCC
+UMI1\tVAR_ANY\t6
+UMI2\tVAR_ANY\t6
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert structure.umi_elements == ["UMI1", "UMI2"]
+
+    def test_barcode_case_insensitive(self):
+        """Test that barcode prefix is case-insensitive."""
+        mdf_content = """BARCODE:Barcode2:barcode3:cDNA
+BARCODE\tVAR_LIST\tAAAA,CCCC
+Barcode2\tVAR_LIST\tGGGG,TTTT
+barcode3\tVAR_LIST\tACGT,TGCA
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert len(structure.barcode_elements) == 3
+        assert "BARCODE" in structure.barcode_elements
+        assert "Barcode2" in structure.barcode_elements
+        assert "barcode3" in structure.barcode_elements
+
+    def test_umi_case_insensitive(self):
+        """Test that UMI prefix is case-insensitive."""
+        mdf_content = """Barcode:UMI:umi2:cDNA
+Barcode\tVAR_LIST\tAAAA,CCCC
+UMI\tVAR_ANY\t6
+umi2\tVAR_ANY\t6
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert len(structure.umi_elements) == 2
+        assert "UMI" in structure.umi_elements
+        assert "umi2" in structure.umi_elements
+
+    def test_no_barcode_elements(self):
+        """Test structure with no barcode elements."""
+        mdf_content = """R1:UMI:cDNA
+R1\tCONST\tACGT
+UMI\tVAR_ANY\t10
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert structure.barcode_elements == []
+
+    def test_no_umi_elements(self):
+        """Test structure with no UMI elements."""
+        mdf_content = """Barcode:cDNA
+Barcode\tVAR_LIST\tAAAA,CCCC
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert structure.umi_elements == []
+
+    def test_from_element_list_identifies_barcodes(self):
+        """Test that from_element_list also identifies barcode/UMI elements."""
+        elements = [
+            MoleculeElement("Barcode", ElementType.VAR_LIST, "AAAA,CCCC"),
+            MoleculeElement("UMI", ElementType.VAR_ANY, "10"),
+            MoleculeElement("cDNA", ElementType.cDNA)
+        ]
+
+        structure = MoleculeStructure.from_element_list(elements)
+
+        assert structure.barcode_elements == ["Barcode"]
+        assert structure.umi_elements == ["UMI"]
+
+
+class TestMoleculeStructureEdgeCases:
+    """Test edge cases and error handling."""
+
+    def test_element_order_preserved(self):
+        """Test that element order from header line is preserved."""
+        mdf_content = """cDNA:UMI:Barcode:PolyT
+Barcode\tVAR_LIST\tAAAA,CCCC
+UMI\tVAR_ANY\t8
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        names = [el.element_name for el in structure]
+        # Order should match header line, not definition order
+        assert names == ["cDNA", "UMI", "Barcode", "PolyT"]
+
+    def test_whitespace_handling(self):
+        """Test that whitespace in MDF is handled correctly."""
+        mdf_content = """  Barcode : UMI : cDNA
+Barcode    VAR_LIST    AAAA,CCCC
+UMI        VAR_ANY     8
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        elements = list(structure)
+        assert len(elements) == 3
+
+    def test_multiple_var_list_barcodes(self):
+        """Test VAR_LIST with many barcodes."""
+        barcodes = ",".join(["ACGT" * 4] * 100)  # 100 barcodes
+        mdf_content = f"""Barcode:cDNA
+Barcode\tVAR_LIST\t{barcodes}
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        elements = list(structure)
+        assert len(elements[0].element_value) == 100
+
+
+class TestConcatenatedElements:
+    """Test concatenated element parsing and identification."""
+
+    def test_parse_concatenated_elements(self):
+        """Test parsing MDF with concatenated barcode parts."""
+        mdf_content = """Barcode|1:Barcode|2:UMI:PolyT:cDNA
+Barcode|1\tVAR_LIST\tAAAACCCC,GGGGTTTT\t4
+Barcode|2\tVAR_LIST\tAAAACCCC,GGGGTTTT\t4
+UMI\tVAR_ANY\t8
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert len(structure.ordered_elements) == 5
+        assert "Barcode|1" in structure.elements_to_concatenate
+        assert "Barcode|2" in structure.elements_to_concatenate
+        assert structure.elements_to_concatenate["Barcode|1"] == ("Barcode", 1)
+        assert structure.elements_to_concatenate["Barcode|2"] == ("Barcode", 2)
+
+    def test_concatenated_elements_counts(self):
+        """Test that concatenated_elements_counts has correct base name and count."""
+        mdf_content = """Barcode|1:Barcode|2:cDNA
+Barcode|1\tVAR_LIST\tAAAACCCC,GGGGTTTT\t4
+Barcode|2\tVAR_LIST\tAAAACCCC,GGGGTTTT\t4
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert "Barcode" in structure.concatenated_elements_counts
+        assert structure.concatenated_elements_counts["Barcode"] == 2
+
+    def test_concatenated_three_parts(self):
+        """Test three-part concatenated element."""
+        mdf_content = """Barcode|1:Barcode|2:Barcode|3:cDNA
+Barcode|1\tVAR_LIST\tAAAACCCCGGGG,TTTTAAAACCCC\t4
+Barcode|2\tVAR_LIST\tAAAACCCCGGGG,TTTTAAAACCCC\t4
+Barcode|3\tVAR_LIST\tAAAACCCCGGGG,TTTTAAAACCCC\t4
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert structure.concatenated_elements_counts["Barcode"] == 3
+
+    def test_concatenated_barcode_umi_identification(self):
+        """Test that concatenated barcode elements are identified by base name."""
+        mdf_content = """Barcode|1:Barcode|2:UMI:cDNA
+Barcode|1\tVAR_LIST\tAAAACCCC,GGGGTTTT\t4
+Barcode|2\tVAR_LIST\tAAAACCCC,GGGGTTTT\t4
+UMI\tVAR_ANY\t8
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        # Base name "Barcode" should be in barcode_elements, not "Barcode|1", "Barcode|2"
+        assert structure.barcode_elements == ["Barcode"]
+        assert "Barcode|1" not in structure.barcode_elements
+        assert "Barcode|2" not in structure.barcode_elements
+        assert structure.umi_elements == ["UMI"]
+
+    def test_concatenated_umi_identification(self):
+        """Test that concatenated UMI elements are identified by base name."""
+        mdf_content = """Barcode:UMI|1:UMI|2:cDNA
+Barcode\tVAR_LIST\tAAAA,CCCC
+UMI|1\tVAR_ANY\t4
+UMI|2\tVAR_ANY\t4
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert structure.barcode_elements == ["Barcode"]
+        assert structure.umi_elements == ["UMI"]
+
+    def test_concatenated_explicit_length(self):
+        """Test that explicit length (4th field) is correctly parsed for concatenated parts."""
+        mdf_content = """Barcode|1:Barcode|2:cDNA
+Barcode|1\tVAR_LIST\tAAAACCCC,GGGGTTTT\t4
+Barcode|2\tVAR_LIST\tAAAACCCC,GGGGTTTT\t4
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        # Each part should have length 4, not the full barcode length 8
+        for el in structure:
+            if el.element_name == "Barcode|1":
+                assert el.element_length == 4
+            elif el.element_name == "Barcode|2":
+                assert el.element_length == 4
+
+    def test_concatenated_elements_must_be_variable(self):
+        """Test that non-variable concatenated elements are rejected."""
+        mdf_content = """Adapter|1:Adapter|2:cDNA
+Adapter|1\tCONST\tACGT
+Adapter|2\tCONST\tTGCA
+"""
+        with pytest.raises(SystemExit):
+            MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+    def test_concatenated_nonconsecutive_indices_rejected(self):
+        """Test that non-consecutive indices (e.g., 1 and 3) are rejected."""
+        mdf_content = """Barcode|1:Barcode|3:cDNA
+Barcode|1\tVAR_LIST\tAAAA,CCCC\t4
+Barcode|3\tVAR_LIST\tAAAA,CCCC\t4
+"""
+        with pytest.raises(SystemExit):
+            MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+
+class TestDuplicatedElements:
+    """Test duplicated element parsing and identification."""
+
+    def test_parse_duplicated_elements(self):
+        """Test parsing MDF with duplicated barcode parts."""
+        mdf_content = """Barcode/1:Barcode/2:UMI:PolyT:cDNA
+Barcode/1\tVAR_LIST\tAAAACCCC,GGGGTTTT
+Barcode/2\tVAR_LIST\tAAAACCCC,GGGGTTTT
+UMI\tVAR_ANY\t8
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert len(structure.ordered_elements) == 5
+        assert "Barcode/1" in structure.duplicated_elements
+        assert "Barcode/2" in structure.duplicated_elements
+        assert structure.duplicated_elements["Barcode/1"] == ("Barcode", 1)
+        assert structure.duplicated_elements["Barcode/2"] == ("Barcode", 2)
+
+    def test_duplicated_elements_counts(self):
+        """Test that duplicated_elements_counts has correct base name and count."""
+        mdf_content = """Barcode/1:Barcode/2:cDNA
+Barcode/1\tVAR_LIST\tAAAACCCC,GGGGTTTT
+Barcode/2\tVAR_LIST\tAAAACCCC,GGGGTTTT
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert "Barcode" in structure.duplicated_elements_counts
+        assert structure.duplicated_elements_counts["Barcode"] == 2
+
+    def test_duplicated_three_parts(self):
+        """Test three-part duplicated element."""
+        mdf_content = """Barcode/1:Barcode/2:Barcode/3:cDNA
+Barcode/1\tVAR_LIST\tAAAACCCC,GGGGTTTT
+Barcode/2\tVAR_LIST\tAAAACCCC,GGGGTTTT
+Barcode/3\tVAR_LIST\tAAAACCCC,GGGGTTTT
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert structure.duplicated_elements_counts["Barcode"] == 3
+
+    def test_duplicated_barcode_umi_identification(self):
+        """Test that duplicated barcode elements are identified by base name."""
+        mdf_content = """Barcode/1:Barcode/2:UMI:cDNA
+Barcode/1\tVAR_LIST\tAAAACCCC,GGGGTTTT
+Barcode/2\tVAR_LIST\tAAAACCCC,GGGGTTTT
+UMI\tVAR_ANY\t8
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        # Base name "Barcode" should be in barcode_elements, not "Barcode/1", "Barcode/2"
+        assert structure.barcode_elements == ["Barcode"]
+        assert "Barcode/1" not in structure.barcode_elements
+        assert "Barcode/2" not in structure.barcode_elements
+        assert structure.umi_elements == ["UMI"]
+
+    def test_duplicated_umi_identification(self):
+        """Test that duplicated UMI elements are identified by base name."""
+        mdf_content = """Barcode:UMI/1:UMI/2:cDNA
+Barcode\tVAR_LIST\tAAAA,CCCC
+UMI/1\tVAR_ANY\t8
+UMI/2\tVAR_ANY\t8
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+        assert structure.barcode_elements == ["Barcode"]
+        assert structure.umi_elements == ["UMI"]
+
+    def test_duplicated_elements_must_be_variable(self):
+        """Test that non-variable duplicated elements are rejected."""
+        mdf_content = """Adapter/1:Adapter/2:cDNA
+Adapter/1\tCONST\tACGT
+Adapter/2\tCONST\tACGT
+"""
+        with pytest.raises(SystemExit):
+            MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+    def test_duplicated_nonconsecutive_indices_rejected(self):
+        """Test that non-consecutive indices (e.g., 1 and 3) are rejected."""
+        mdf_content = """Barcode/1:Barcode/3:cDNA
+Barcode/1\tVAR_LIST\tAAAA,CCCC
+Barcode/3\tVAR_LIST\tAAAA,CCCC
+"""
+        with pytest.raises(SystemExit):
+            MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+    def test_duplicated_different_types_rejected(self):
+        """Test that duplicated elements with different types are rejected."""
+        mdf_content = """Barcode/1:Barcode/2:cDNA
+Barcode/1\tVAR_LIST\tAAAA,CCCC
+Barcode/2\tVAR_ANY\t4
+"""
+        with pytest.raises(SystemExit):
+            MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
diff --git a/tests/test_universal_extraction.py b/tests/test_universal_extraction.py
new file mode 100644
index 00000000..e9363393
--- /dev/null
+++ b/tests/test_universal_extraction.py
@@ -0,0 +1,1104 @@
+############################################################################
+# Copyright (c) 2025-2026 University of Helsinki
+# All Rights Reserved
+# See file LICENSE for details.
+############################################################################
+
+"""
+Tests for universal barcode extraction.
+
+Tests the universal barcode calling infrastructure:
+- DetectedElement container class
+- ExtractionResult dict-based detection result
+- UniversalSingleMoleculeExtractor for pattern detection
+- BarcodeResult protocol compliance
+"""
+
+import os
+import pytest
+
+from src.barcode_calling.callers.extraction_result import (
+    DetectedElement,
+    ExtractionResult,
+    ReadStats
+)
+from src.barcode_calling.callers.molecule_structure import (
+    ElementType,
+    MoleculeElement,
+    MoleculeStructure
+)
+from src.barcode_calling.callers.universal_extraction import (
+    UniversalSingleMoleculeExtractor
+)
+from src.barcode_calling.callers.protocol import BarcodeResult
+
+# Test data directory
+TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "universal_data")
+
+
+# =============================================================================
+# Test Fixtures
+# =============================================================================
+
+def create_simple_structure():
+    """Create simple molecule structure: Barcode:UMI:PolyT:cDNA"""
+    mdf_content = """Barcode:UMI:PolyT:cDNA
+Barcode\tVAR_LIST\tATCCTTAGTGTTTGTC,CTTAGGAGTGGTTAGC,TGAGGGCCAACGTGCT
+UMI\tVAR_ANY\t12
+"""
+    return MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+
+def create_10x_structure():
+    """Create 10x-like molecule structure with R1 adapter and TSO.
+
+    Uses inline barcodes instead of file reference to avoid path issues.
+    """
+    # Use inline barcodes to avoid relative path issues with VAR_FILE
+    mdf_content = """R1:Barcode:UMI:PolyT:cDNA:TSO
+R1\tCONST\tCTACACGACGCTCTTCCGATCT
+Barcode\tVAR_LIST\tATCCTTAGTGTTTGTC,CTTAGGAGTGGTTAGC,TGAGGGCCAACGTGCT,AAACCCGGGTTTAAAC
+UMI\tVAR_ANY\t12
+TSO\tCONST\tCCCATGTACTCTGCGTTGATACCACTGCTT
+"""
+    return MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+
+def create_detected_elements():
+    """Create sample detected elements for testing."""
+    return {
+        "Barcode": DetectedElement(22, 37, 16, "ATCCTTAGTGTTTGTC"),
+        "UMI": DetectedElement(38, 49, 0, "AACCTTAGACCG"),
+        "PolyT": DetectedElement(50, 79, 0)
+    }
+
+
+# =============================================================================
+# Test DetectedElement
+# =============================================================================
+
+class TestDetectedElement:
+    """Test DetectedElement container class."""
+
+    def test_default_init(self):
+        """Test initialization with default values."""
+        element = DetectedElement()
+
+        assert element.start == -1
+        assert element.end == -1
+        assert element.score == -1
+        assert element.seq is None
+
+    def test_custom_init(self):
+        """Test initialization with custom values."""
+        element = DetectedElement(start=10, end=25, score=15, seq="ACTGACTG")
+
+        assert element.start == 10
+        assert element.end == 25
+        assert element.score == 15
+        assert element.seq == "ACTGACTG"
+
+    def test_partial_init(self):
+        """Test initialization with some values."""
+        element = DetectedElement(start=100, end=150)
+
+        assert element.start == 100
+        assert element.end == 150
+        assert element.score == -1
+        assert element.seq is None
+
+    def test_coordinates_only(self):
+        """Test element with only coordinates (no sequence)."""
+        element = DetectedElement(50, 70, 0)
+
+        assert element.start == 50
+        assert element.end == 70
+        assert element.seq is None
+
+
+# =============================================================================
+# Test ExtractionResult
+# =============================================================================
+
+class TestExtractionResult:
+    """Test ExtractionResult dict-based detection result."""
+
+    def test_init(self):
+        """Test initialization."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.read_id == "read_001"
+        assert result.strand == "+"
+        assert result.molecule_structure is structure
+        assert "Barcode" in result.detected_results
+
+    def test_barcode_identification_by_prefix(self):
+        """Test barcode elements are identified by 'barcode' prefix."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        # Should identify 'Barcode' element as barcode (case-insensitive prefix)
+        assert "Barcode" in result._barcode_elements
+
+    def test_umi_identification_by_prefix(self):
+        """Test UMI elements are identified by 'umi' prefix."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        # Should identify 'UMI' element as UMI (case-insensitive prefix)
+        assert "UMI" in result._umi_elements
+
+    def test_get_barcode_single(self):
+        """Test getting single barcode."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_barcode() == "ATCCTTAGTGTTTGTC"
+
+    def test_get_barcode_missing(self):
+        """Test getting barcode when not detected."""
+        structure = create_simple_structure()
+        detected = {
+            "UMI": DetectedElement(38, 49, 0, "AACCTTAGACCG"),
+            "PolyT": DetectedElement(50, 79, 0)
+        }
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_barcode() == ExtractionResult.NOSEQ
+
+    def test_get_umi(self):
+        """Test getting UMI."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_umi() == "AACCTTAGACCG"
+
+    def test_get_umi_missing(self):
+        """Test getting UMI when not detected."""
+        structure = create_simple_structure()
+        detected = {
+            "Barcode": DetectedElement(22, 37, 16, "ATCCTTAGTGTTTGTC"),
+            "PolyT": DetectedElement(50, 79, 0)
+        }
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_umi() == ExtractionResult.NOSEQ
+
+    def test_is_valid_with_barcode(self):
+        """Test is_valid returns True when barcode detected."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.is_valid() is True
+
+    def test_is_valid_without_barcode(self):
+        """Test is_valid returns False when no barcode."""
+        structure = create_simple_structure()
+        detected = {"PolyT": DetectedElement(50, 79, 0)}
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.is_valid() is False
+
+    def test_has_barcode(self):
+        """Test has_barcode method."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.has_barcode() is True
+
+    def test_has_umi(self):
+        """Test has_umi method."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.has_umi() is True
+
+    def test_has_umi_false(self):
+        """Test has_umi returns False when no UMI."""
+        structure = create_simple_structure()
+        detected = {
+            "Barcode": DetectedElement(22, 37, 16, "ATCCTTAGTGTTTGTC"),
+            "PolyT": DetectedElement(50, 79, 0)
+        }
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.has_umi() is False
+
+    def test_get_barcode_score(self):
+        """Test getting barcode score."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_barcode_score() == 16
+
+    def test_get_barcode_score_no_barcode(self):
+        """Test barcode score when no barcode detected."""
+        structure = create_simple_structure()
+        detected = {"PolyT": DetectedElement(50, 79, 0)}
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_barcode_score() == -1
+
+    def test_set_strand(self):
+        """Test setting strand."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", ".", detected)
+        result.set_strand("-")
+
+        assert result.strand == "-"
+
+    def test_update_coordinates(self):
+        """Test shifting all coordinates."""
+        structure = create_simple_structure()
+        detected = {
+            "Barcode": DetectedElement(22, 37, 16, "ATCCTTAGTGTTTGTC"),
+            "UMI": DetectedElement(38, 49, 0, "AACCTTAGACCG"),
+        }
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+        result.update_coordinates(100)
+
+        assert result.detected_results["Barcode"].start == 122
+        assert result.detected_results["Barcode"].end == 137
+        assert result.detected_results["UMI"].start == 138
+        assert result.detected_results["UMI"].end == 149
+
+    def test_update_coordinates_invalid(self):
+        """Test coordinate shift preserves -1 values."""
+        structure = create_simple_structure()
+        detected = {
+            "Barcode": DetectedElement(-1, -1, -1, ExtractionResult.NOSEQ),
+        }
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+        result.update_coordinates(100)
+
+        # -1 values (invalid/not detected) should remain -1
+        assert result.detected_results["Barcode"].start == -1
+        assert result.detected_results["Barcode"].end == -1
+
+    def test_more_informative_than_by_count(self):
+        """Test comparison by detected element count."""
+        structure = create_simple_structure()
+
+        detected1 = {
+            "Barcode": DetectedElement(22, 37, 16, "ATCCTTAGTGTTTGTC"),
+            "UMI": DetectedElement(38, 49, 0, "AACCTTAGACCG"),
+            "PolyT": DetectedElement(50, 79, 0),
+        }
+        detected2 = {
+            "Barcode": DetectedElement(22, 37, 16, "ATCCTTAGTGTTTGTC"),
+        }
+
+        result1 = ExtractionResult(structure, "read_001", "+", detected1)
+        result2 = ExtractionResult(structure, "read_001", "+", detected2)
+
+        assert result1.more_informative_than(result2) is True
+        assert result2.more_informative_than(result1) is False
+
+    def test_more_informative_than_by_score(self):
+        """Test comparison by total score when counts equal."""
+        structure = create_simple_structure()
+
+        detected1 = {
+            "Barcode": DetectedElement(22, 37, 20, "ATCCTTAGTGTTTGTC"),
+        }
+        detected2 = {
+            "Barcode": DetectedElement(22, 37, 10, "ATCCTTAGTGTTTGTC"),
+        }
+
+        result1 = ExtractionResult(structure, "read_001", "+", detected1)
+        result2 = ExtractionResult(structure, "read_001", "+", detected2)
+
+        assert result1.more_informative_than(result2) is True
+
+    def test_str_format(self):
+        """Test string formatting for TSV output."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+        output = str(result)
+
+        # Standard format: read_id, barcode, UMI, BC_score, valid_UMI, strand
+        assert "read_001" in output
+        assert "ATCCTTAGTGTTTGTC" in output
+        assert "AACCTTAGACCG" in output
+        assert "16" in output
+        assert "True" in output
+        assert "+" in output
+
+    def test_header_format(self):
+        """Test header generation."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+        header = result.header()
+
+        assert "#read_id" in header
+        assert "barcode" in header
+        assert "UMI" in header
+        assert "BC_score" in header
+        assert "valid_UMI" in header
+        assert "strand" in header
+
+    def test_get_additional_attributes(self):
+        """Test getting additional attributes excludes barcode/UMI elements."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+        attrs = result.get_additional_attributes()
+
+        # Barcode and UMI are excluded (tracked separately by ReadStats)
+        assert "Barcode detected" not in attrs
+        assert "UMI detected" not in attrs
+        # Supplementary elements are included
+        assert "PolyT detected" in attrs
+
+    def test_noseq_constant(self):
+        """Test NOSEQ constant."""
+        assert ExtractionResult.NOSEQ == "*"
+
+
+class TestExtractionResultProtocolCompliance:
+    """Test that ExtractionResult conforms to BarcodeResult protocol."""
+
+    def test_isinstance_check(self):
+        """Test that ExtractionResult is recognized as BarcodeResult."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        # Protocol is runtime_checkable
+        assert isinstance(result, BarcodeResult)
+
+    def test_has_required_attributes(self):
+        """Test that all protocol attributes exist."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        # Required attributes
+        assert hasattr(result, 'NOSEQ')
+        assert hasattr(result, 'read_id')
+        assert hasattr(result, 'strand')
+
+    def test_has_required_methods(self):
+        """Test that all protocol methods exist."""
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        # Required methods
+        assert callable(getattr(result, 'get_barcode', None))
+        assert callable(getattr(result, 'get_umi', None))
+        assert callable(getattr(result, 'is_valid', None))
+        assert callable(getattr(result, 'has_barcode', None))
+        assert callable(getattr(result, 'has_umi', None))
+        assert callable(getattr(result, 'set_strand', None))
+        assert callable(getattr(result, 'update_coordinates', None))
+        assert callable(getattr(result, 'more_informative_than', None))
+        assert callable(getattr(result, 'get_additional_attributes', None))
+        assert callable(getattr(result, 'header', None))
+
+
+# =============================================================================
+# Test ExtractionResult ReadStats
+# =============================================================================
+
+class TestExtractionReadStats:
+    """Test ReadStats class for ExtractionResult."""
+
+    def test_init(self):
+        """Test initialization."""
+        stats = ReadStats()
+
+        assert stats.read_count == 0
+        assert stats.bc_count == 0
+        assert stats.umi_count == 0
+        assert len(stats.pattern_counts) == 0
+
+    def test_add_read_with_barcode(self):
+        """Test adding read with barcode detected."""
+        stats = ReadStats()
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        stats.add_read(result)
+
+        assert stats.read_count == 1
+        assert stats.bc_count == 1
+        assert stats.umi_count == 1
+        assert stats.pattern_counts["Barcode"] == 1
+        assert stats.pattern_counts["UMI"] == 1
+        assert stats.pattern_counts["PolyT"] == 1
+
+    def test_add_read_without_barcode(self):
+        """Test adding read without barcode."""
+        stats = ReadStats()
+        structure = create_simple_structure()
+        detected = {"PolyT": DetectedElement(50, 79, 0)}
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        stats.add_read(result)
+
+        assert stats.read_count == 1
+        assert stats.bc_count == 0
+        assert stats.umi_count == 0
+        assert stats.pattern_counts["PolyT"] == 1
+
+    def test_add_multiple_reads(self):
+        """Test adding multiple reads."""
+        stats = ReadStats()
+        structure = create_simple_structure()
+
+        # Read 1: has barcode, UMI, polyT
+        detected1 = create_detected_elements()
+        result1 = ExtractionResult(structure, "read_001", "+", detected1)
+
+        # Read 2: has only barcode
+        detected2 = {"Barcode": DetectedElement(22, 37, 16, "ATCCTTAGTGTTTGTC")}
+        result2 = ExtractionResult(structure, "read_002", "+", detected2)
+
+        # Read 3: no barcode
+        detected3 = {"PolyT": DetectedElement(50, 79, 0)}
+        result3 = ExtractionResult(structure, "read_003", "+", detected3)
+
+        stats.add_read(result1)
+        stats.add_read(result2)
+        stats.add_read(result3)
+
+        assert stats.read_count == 3
+        assert stats.bc_count == 2  # reads 1 and 2
+        assert stats.umi_count == 1  # only read 1
+        assert stats.pattern_counts["Barcode"] == 2
+        assert stats.pattern_counts["PolyT"] == 2
+
+    def test_str_format(self):
+        """Test string formatting."""
+        stats = ReadStats()
+        structure = create_simple_structure()
+        detected = create_detected_elements()
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        stats.add_read(result)
+        output = str(stats)
+
+        assert "Total reads:\t1" in output
+        assert "Barcode detected:\t1" in output
+        assert "UMI detected:\t1" in output
+
+
+# =============================================================================
+# Test UniversalSingleMoleculeExtractor
+# =============================================================================
+
+class TestUniversalSingleMoleculeExtractor:
+    """Test UniversalSingleMoleculeExtractor class."""
+
+    def test_init_simple_structure(self):
+        """Test initialization with simple structure."""
+        structure = create_simple_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        assert extractor.has_polyt is True
+        assert extractor.has_cdna is True
+        assert "UMI" in extractor.elements_to_extract
+
+    def test_init_10x_structure(self):
+        """Test initialization with 10x structure."""
+        structure = create_10x_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        assert extractor.has_polyt is True
+        assert extractor.has_cdna is True
+        assert "R1" in extractor.constant_elements_to_detect
+        assert "TSO" in extractor.constant_elements_to_detect
+
+    def test_result_type(self):
+        """Test result_type returns ExtractionResult."""
+        structure = create_simple_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        assert extractor.result_type() is ExtractionResult
+
+    def test_header(self):
+        """Test header generation."""
+        structure = create_simple_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        header = extractor.header()
+
+        assert "#read_id" in header
+        assert "strand" in header
+
+
+class TestPatternDetection:
+    """Test actual barcode/UMI detection with synthetic sequences."""
+
+    def test_find_barcode_umi_forward(self):
+        """Test detection in forward strand read."""
+        structure = create_10x_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Test read with known structure (from test_reads.fq READ_1)
+        # Structure: R1(22bp) + Barcode(16bp) + UMI(12bp) + PolyT + cDNA
+        read_id = "READ_1"
+        sequence = "TACACGACGCTCTTCCGATCTATCCTTAGTGTTTGTCAACCTTAGACCGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGGGCCTCAGCGTAATCCTATTGTGCATG"
+
+        result = extractor.find_barcode_umi(read_id, sequence)
+
+        assert result.read_id == read_id
+        assert result.strand == "+"
+        # Should detect polyT
+        assert "PolyT" in result.detected_results
+        polyt = result.detected_results["PolyT"]
+        assert polyt.start != -1
+
+    def test_find_barcode_umi_reverse(self):
+        """Test detection in reverse strand read."""
+        structure = create_10x_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Test read with TSO at beginning (from test_reads.fq READ_2)
+        # This indicates reverse strand - TSO is at 3' end
+        read_id = "READ_2"
+        # Starts with TSO adapter, ends with R1
+        sequence = "AAGCAGTGGTATCAACGCAGAGTACATGGGAAGCAAGGAGGCAAGGCCCGCGCCAAGGCCAAGTCGCGGTCTTCCCGGGCCCGGGCTACCTATTCCCGGTGGGGCGTGTGCACCGGCTGCTGCGCAAGGGCAACTACGCGGAGCGTGTGGGCGCCGGCGCGCCGGTATACATGGCGGCGGTGCTGGAGTACCTAACGGCCGAGATCCTGGAGCTGGCGGGCAACGCGGCCCGCGACAACAAGAAGACTCGCATCATCCCGCGCCACCTGCAGCTGGCCATCCGCAACGACGAGGAGCTCAACAAGCTGCTGGGCAAAGTGACGATCGCGCAGGGCGGCGTCCTGCCCAACATCCAGGCCGTGCTGCTGCCCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATCAAGGAGGCTCGCTAACCACTCCTAAGAGATCGGAAGAGCGTCGTGTA"
+
+        result = extractor.find_barcode_umi(read_id, sequence)
+
+        assert result.read_id == read_id
+        # Should detect in reverse orientation (TSO at start means polyA/polyT would be internal)
+
+    def test_no_match_random_sequence(self):
+        """Test detection with random sequence (no patterns)."""
+        structure = create_simple_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        read_id = "RANDOM"
+        sequence = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT" * 5
+
+        result = extractor.find_barcode_umi(read_id, sequence)
+
+        assert result.read_id == read_id
+        # May or may not detect anything, but should not crash
+
+
+class TestMultipleBarcodeNaming:
+    """Test barcode/UMI identification with various naming conventions."""
+
+    def test_barcode_prefix_lowercase(self):
+        """Test 'barcode' prefix detection (lowercase)."""
+        mdf_content = """barcode1:cDNA
+barcode1\tVAR_LIST\tAAAA,CCCC,GGGG
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+        detected = {"barcode1": DetectedElement(0, 3, 10, "AAAA")}
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_barcode() == "AAAA"
+        assert "barcode1" in result._barcode_elements
+
+    def test_barcode_prefix_mixedcase(self):
+        """Test 'Barcode' prefix detection (mixed case)."""
+        mdf_content = """Barcode:cDNA
+Barcode\tVAR_LIST\tAAAA,CCCC,GGGG
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+        detected = {"Barcode": DetectedElement(0, 3, 10, "CCCC")}
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_barcode() == "CCCC"
+
+    def test_umi_prefix_lowercase(self):
+        """Test 'umi' prefix detection (lowercase)."""
+        mdf_content = """umi:cDNA
+umi\tVAR_ANY\t10
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+        detected = {"umi": DetectedElement(0, 9, 0, "ACGTACGTAC")}
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_umi() == "ACGTACGTAC"
+        assert "umi" in result._umi_elements
+
+    def test_multiple_barcodes_concatenated(self):
+        """Test multiple barcode elements are concatenated with '|'."""
+        mdf_content = """Barcode1:Barcode2:cDNA
+Barcode1\tVAR_LIST\tAAAA,CCCC
+Barcode2\tVAR_LIST\tGGGG,TTTT
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+        detected = {
+            "Barcode1": DetectedElement(0, 3, 10, "AAAA"),
+            "Barcode2": DetectedElement(4, 7, 10, "GGGG")
+        }
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_barcode() == "AAAA|GGGG"
+
+    def test_multiple_umis_concatenated(self):
+        """Test multiple UMI elements are concatenated with '|'."""
+        mdf_content = """UMI1:UMI2:cDNA
+UMI1\tVAR_ANY\t6
+UMI2\tVAR_ANY\t6
+"""
+        structure = MoleculeStructure(iter(mdf_content.strip().split('\n')))
+        detected = {
+            "UMI1": DetectedElement(0, 5, 0, "AAAAAA"),
+            "UMI2": DetectedElement(6, 11, 0, "CCCCCC")
+        }
+
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_umi() == "AAAAAA|CCCCCC"
+
+
+class TestIntegration:
+    """Integration tests with test data files."""
+
+    def test_process_test_reads_file(self):
+        """Test processing reads from test_reads.fq."""
+        structure = create_10x_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Load test reads
+        test_file = os.path.join(TEST_DATA_DIR, "test_reads.fq")
+        reads = []
+        with open(test_file) as f:
+            lines = f.readlines()
+            for i in range(0, len(lines), 4):
+                if i + 1 < len(lines):
+                    read_id = lines[i].strip()[1:].split()[0]  # Remove @ and take first part
+                    sequence = lines[i + 1].strip()
+                    reads.append((read_id, sequence))
+
+        # Process each read
+        results = []
+        for read_id, sequence in reads:
+            result = extractor.find_barcode_umi(read_id, sequence)
+            results.append(result)
+
+        # Should process all reads without error
+        assert len(results) == len(reads)
+
+        # Check results are ExtractionResult instances
+        for result in results:
+            assert isinstance(result, ExtractionResult)
+            assert result.read_id in [r[0] for r in reads]
+
+
+class TestConcatenatedElementProcessing:
+    """Test concatenated element extraction and processing."""
+
+    def _create_concat_structure(self):
+        """Create structure with concatenated barcode: Barcode|1:Barcode|2:UMI:PolyT:cDNA"""
+        # Use 8bp barcodes split into two 4bp parts
+        mdf_content = """Barcode|1:Barcode|2:UMI:PolyT:cDNA
+Barcode|1\tVAR_LIST\tAAAACCCC,GGGGTTTT,ACGTACGT\t4
+Barcode|2\tVAR_LIST\tAAAACCCC,GGGGTTTT,ACGTACGT\t4
+UMI\tVAR_ANY\t8
+"""
+        return MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+    def test_concat_structure_barcode_elements(self):
+        """Test that concatenated barcode is identified by base name."""
+        structure = self._create_concat_structure()
+        assert structure.barcode_elements == ["Barcode"]
+        assert structure.umi_elements == ["UMI"]
+
+    def test_concat_structure_counts(self):
+        """Test concatenated elements counts."""
+        structure = self._create_concat_structure()
+        assert structure.concatenated_elements_counts["Barcode"] == 2
+
+    def test_concat_extractor_init(self):
+        """Test extractor initialization with concatenated elements."""
+        structure = self._create_concat_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Should have one index for the base name "Barcode", not individual parts
+        assert "Barcode" in extractor.index_dict
+        assert "Barcode|1" not in extractor.index_dict
+        assert "Barcode|2" not in extractor.index_dict
+
+    def test_concat_extractor_header(self):
+        """Test header output for concatenated elements."""
+        structure = self._create_concat_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+        header = extractor.header()
+
+        # Header should have standard fields, not individual parts
+        assert "barcode" in header
+        assert "UMI" in header
+        # Should NOT contain individual concatenated part names
+        assert "Barcode|1" not in header
+        assert "Barcode|2" not in header
+
+    def test_process_concatenated_elements_all_detected(self):
+        """Test process_concatenated_elements when all parts are found."""
+        structure = self._create_concat_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Simulate detected parts: AAAA at [10,13] and CCCC at [14,17]
+        sequence = "XXXXXXXXXX" + "AAAA" + "CCCC" + "XXXXXXXX"
+        concat_storage = {"Barcode": [(10, 13), (14, 17)]}
+        detected_results = {}
+
+        extractor.process_concatenated_elements(concat_storage, detected_results, sequence)
+
+        assert "Barcode" in detected_results
+        result = detected_results["Barcode"]
+        # Should find corrected match for "AAAACCCC" in whitelist
+        assert result.seq == "AAAACCCC"
+        assert result.start == 10
+        assert result.end == 17
+
+    def test_process_concatenated_elements_missing_part(self):
+        """Test process_concatenated_elements when a part is missing."""
+        structure = self._create_concat_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Part 2 not detected (start=-1)
+        concat_storage = {"Barcode": [(10, 13), (-1, -1)]}
+        detected_results = {}
+
+        extractor.process_concatenated_elements(concat_storage, detected_results, "AAAA" * 10)
+
+        assert "Barcode" in detected_results
+        assert detected_results["Barcode"].seq == ExtractionResult.NOSEQ
+        assert detected_results["Barcode"].start == -1
+
+    def test_process_concatenated_no_correction(self):
+        """Test process_concatenated_elements without correction (raw concatenation)."""
+        structure = self._create_concat_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+        extractor.correct_sequences = False
+
+        sequence = "XXXXXXXXXX" + "ACGT" + "TGCA" + "XXXXXXXX"
+        concat_storage = {"Barcode": [(10, 13), (14, 17)]}
+        detected_results = {}
+
+        extractor.process_concatenated_elements(concat_storage, detected_results, sequence)
+
+        assert "Barcode" in detected_results
+        result = detected_results["Barcode"]
+        # Without correction, raw concatenated sequence is stored
+        assert result.seq == "ACGTTGCA"
+        assert result.start == 10
+        assert result.end == 17
+
+    def test_concat_extraction_result_get_barcode(self):
+        """Test ExtractionResult.get_barcode() with concatenated element result."""
+        structure = self._create_concat_structure()
+        detected = {"Barcode": DetectedElement(10, 17, 14, "AAAACCCC")}
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_barcode() == "AAAACCCC"
+        assert result.has_barcode() is True
+        assert result.is_valid() is True
+
+    def test_concat_extraction_result_str_format(self):
+        """Test ExtractionResult __str__ output with concatenated elements."""
+        structure = self._create_concat_structure()
+        detected = {
+            "Barcode": DetectedElement(10, 17, 14, "AAAACCCC"),
+            "UMI": DetectedElement(18, 25, 0, "ACGTACGT"),
+            "PolyT": DetectedElement(26, 40, 0)
+        }
+        result = ExtractionResult(structure, "read_001", "+", detected)
+        output = str(result)
+
+        fields = output.split('\t')
+        assert fields[0] == "read_001"
+        assert fields[1] == "AAAACCCC"       # barcode
+        assert fields[2] == "ACGTACGT"       # UMI
+        # Should NOT contain individual part names
+
+    def test_concat_extraction_result_no_barcode(self):
+        """Test ExtractionResult when concatenated barcode correction fails."""
+        structure = self._create_concat_structure()
+        detected = {
+            "Barcode": DetectedElement(-1, -1, -1, ExtractionResult.NOSEQ),
+            "UMI": DetectedElement(18, 25, 0, "ACGTACGT")
+        }
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_barcode() == ExtractionResult.NOSEQ
+        assert result.has_barcode() is False
+
+    def test_concat_additional_attributes_exclude_barcode(self):
+        """Test that concatenated barcode is excluded from additional attributes."""
+        structure = self._create_concat_structure()
+        detected = {
+            "Barcode": DetectedElement(10, 17, 14, "AAAACCCC"),
+            "PolyT": DetectedElement(26, 40, 0)
+        }
+        result = ExtractionResult(structure, "read_001", "+", detected)
+        attrs = result.get_additional_attributes()
+
+        assert "Barcode detected" not in attrs
+        assert "Barcode|1 detected" not in attrs
+        assert "Barcode|2 detected" not in attrs
+        assert "PolyT detected" in attrs
+
+
+class TestDuplicatedElementProcessing:
+    """Test duplicated element extraction and processing (majority vote)."""
+
+    def _create_dupl_structure(self):
+        """Create structure with duplicated barcode: Barcode/1:Barcode/2:UMI:PolyT:cDNA"""
+        mdf_content = """Barcode/1:Barcode/2:UMI:PolyT:cDNA
+Barcode/1\tVAR_LIST\tAAAACCCC,GGGGTTTT,ACGTACGT
+Barcode/2\tVAR_LIST\tAAAACCCC,GGGGTTTT,ACGTACGT
+UMI\tVAR_ANY\t8
+"""
+        return MoleculeStructure(iter(mdf_content.strip().split('\n')))
+
+    def test_dupl_structure_barcode_elements(self):
+        """Test that duplicated barcode is identified by base name."""
+        structure = self._create_dupl_structure()
+        assert structure.barcode_elements == ["Barcode"]
+        assert structure.umi_elements == ["UMI"]
+
+    def test_dupl_structure_counts(self):
+        """Test duplicated elements counts."""
+        structure = self._create_dupl_structure()
+        assert structure.duplicated_elements_counts["Barcode"] == 2
+
+    def test_dupl_extractor_init(self):
+        """Test extractor initialization with duplicated elements."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Should have one index for the base name "Barcode", not individual parts
+        assert "Barcode" in extractor.index_dict
+        assert "Barcode/1" not in extractor.index_dict
+        assert "Barcode/2" not in extractor.index_dict
+
+    def test_dupl_extractor_header(self):
+        """Test header output for duplicated elements."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+        header = extractor.header()
+
+        assert "barcode" in header
+        assert "UMI" in header
+        # Should NOT contain individual duplicated part names
+        assert "Barcode/1" not in header
+        assert "Barcode/2" not in header
+
+    def test_process_duplicated_both_agree(self):
+        """Test process_duplicated_elements when both copies correct to the same barcode."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Both copies have AAAACCCC (exact match to whitelist)
+        sequence = "XXXXXXXXXX" + "AAAACCCC" + "XXXXXXXX" + "AAAACCCC" + "XXXXXXXX"
+        dupl_storage = {"Barcode": [(10, 17), (26, 33)]}
+        detected_results = {}
+
+        extractor.process_duplicated_elements(dupl_storage, detected_results, sequence)
+
+        assert "Barcode" in detected_results
+        result = detected_results["Barcode"]
+        assert result.seq == "AAAACCCC"
+        assert result.start == 10
+        assert result.end == 17
+
+    def test_process_duplicated_one_succeeds(self):
+        """Test process_duplicated_elements when one copy corrects, other fails."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Copy 1: AAAACCCC (exact match), Copy 2: totally wrong sequence
+        sequence = "XXXXXXXXXX" + "AAAACCCC" + "XXXXXXXX" + "TTTTTTTT" + "XXXXXXXX"
+        dupl_storage = {"Barcode": [(10, 17), (26, 33)]}
+        detected_results = {}
+
+        extractor.process_duplicated_elements(dupl_storage, detected_results, sequence)
+
+        assert "Barcode" in detected_results
+        result = detected_results["Barcode"]
+        assert result.seq == "AAAACCCC"
+
+    def test_process_duplicated_both_fail(self):
+        """Test process_duplicated_elements when neither copy corrects."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Both copies have garbage sequences that don't match any whitelist entry
+        sequence = "XXXXXXXXXX" + "TTTTTTTT" + "XXXXXXXX" + "TTTTTTTT" + "XXXXXXXX"
+        dupl_storage = {"Barcode": [(10, 17), (26, 33)]}
+        detected_results = {}
+
+        extractor.process_duplicated_elements(dupl_storage, detected_results, sequence)
+
+        assert "Barcode" in detected_results
+        assert detected_results["Barcode"].seq == ExtractionResult.NOSEQ
+        assert detected_results["Barcode"].start == -1
+
+    def test_process_duplicated_disagree(self):
+        """Test process_duplicated_elements when copies correct to different barcodes."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Copy 1: AAAACCCC, Copy 2: GGGGTTTT (both valid but different)
+        sequence = "XXXXXXXXXX" + "AAAACCCC" + "XXXXXXXX" + "GGGGTTTT" + "XXXXXXXX"
+        dupl_storage = {"Barcode": [(10, 17), (26, 33)]}
+        detected_results = {}
+
+        extractor.process_duplicated_elements(dupl_storage, detected_results, sequence)
+
+        assert "Barcode" in detected_results
+        assert detected_results["Barcode"].seq == ExtractionResult.NOSEQ
+        assert detected_results["Barcode"].start == -1
+
+    def test_process_duplicated_none_detected(self):
+        """Test process_duplicated_elements when no copies are detected."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        dupl_storage = {"Barcode": [(-1, -1), (-1, -1)]}
+        detected_results = {}
+
+        extractor.process_duplicated_elements(dupl_storage, detected_results, "AAAA" * 20)
+
+        assert "Barcode" in detected_results
+        assert detected_results["Barcode"].seq == ExtractionResult.NOSEQ
+        assert detected_results["Barcode"].start == -1
+
+    def test_process_duplicated_one_detected(self):
+        """Test process_duplicated_elements when only one copy is detected."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+
+        # Only copy 1 detected with AAAACCCC
+        sequence = "XXXXXXXXXX" + "AAAACCCC" + "XXXXXXXXXXXXXXXX"
+        dupl_storage = {"Barcode": [(10, 17), (-1, -1)]}
+        detected_results = {}
+
+        extractor.process_duplicated_elements(dupl_storage, detected_results, sequence)
+
+        assert "Barcode" in detected_results
+        result = detected_results["Barcode"]
+        assert result.seq == "AAAACCCC"
+        assert result.start == 10
+
+    def test_process_duplicated_no_correction_match(self):
+        """Test process_duplicated_elements without correction when copies match."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+        extractor.correct_sequences = False
+
+        sequence = "XXXXXXXXXX" + "ACGTACGT" + "XXXXXXXX" + "ACGTACGT" + "XXXXXXXX"
+        dupl_storage = {"Barcode": [(10, 17), (26, 33)]}
+        detected_results = {}
+
+        extractor.process_duplicated_elements(dupl_storage, detected_results, sequence)
+
+        assert "Barcode" in detected_results
+        result = detected_results["Barcode"]
+        assert result.seq == "ACGTACGT"
+        assert result.start == 10
+
+    def test_process_duplicated_no_correction_differ(self):
+        """Test process_duplicated_elements without correction when copies differ."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+        extractor.correct_sequences = False
+
+        sequence = "XXXXXXXXXX" + "ACGTACGT" + "XXXXXXXX" + "TGCATGCA" + "XXXXXXXX"
+        dupl_storage = {"Barcode": [(10, 17), (26, 33)]}
+        detected_results = {}
+
+        extractor.process_duplicated_elements(dupl_storage, detected_results, sequence)
+
+        assert "Barcode" in detected_results
+        result = detected_results["Barcode"]
+        # Should be comma-separated when they differ
+        assert result.seq == "ACGTACGT,TGCATGCA"
+        assert result.start == 10
+
+    def test_process_duplicated_no_correction_one_detected(self):
+        """Test process_duplicated_elements without correction, one copy detected."""
+        structure = self._create_dupl_structure()
+        extractor = UniversalSingleMoleculeExtractor(structure)
+        extractor.correct_sequences = False
+
+        sequence = "XXXXXXXXXX" + "ACGTACGT" + "XXXXXXXXXXXXXXXX"
+        dupl_storage = {"Barcode": [(10, 17), (-1, -1)]}
+        detected_results = {}
+
+        extractor.process_duplicated_elements(dupl_storage, detected_results, sequence)
+
+        assert "Barcode" in detected_results
+        result = detected_results["Barcode"]
+        assert result.seq == "ACGTACGT"
+        assert result.start == 10
+
+    def test_dupl_extraction_result_get_barcode(self):
+        """Test ExtractionResult.get_barcode() with duplicated element result."""
+        structure = self._create_dupl_structure()
+        detected = {"Barcode": DetectedElement(10, 17, 14, "AAAACCCC")}
+        result = ExtractionResult(structure, "read_001", "+", detected)
+
+        assert result.get_barcode() == "AAAACCCC"
+        assert result.has_barcode() is True
+        assert result.is_valid() is True
+
+    def test_dupl_additional_attributes_exclude_barcode(self):
+        """Test that duplicated barcode is excluded from additional attributes."""
+        structure = self._create_dupl_structure()
+        detected = {
+            "Barcode": DetectedElement(10, 17, 14, "AAAACCCC"),
+            "PolyT": DetectedElement(26, 40, 0)
+        }
+        result = ExtractionResult(structure, "read_001", "+", detected)
+        attrs = result.get_additional_attributes()
+
+        assert "Barcode detected" not in attrs
+        assert "Barcode/1 detected" not in attrs
+        assert "Barcode/2 detected" not in attrs
+        assert "PolyT detected" in attrs
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])
diff --git a/tests/universal_data/10x.mdf b/tests/universal_data/10x.mdf
new file mode 100644
index 00000000..b14e4004
--- /dev/null
+++ b/tests/universal_data/10x.mdf
@@ -0,0 +1,5 @@
+R1:Barcode:UMI:PolyT:cDNA:TSO
+R1	CONST	CTACACGACGCTCTTCCGATCT
+Barcode	VAR_FILE	barcodes_small.tsv
+UMI	VAR_ANY	12
+TSO	CONST	CCCATGTACTCTGCGTTGATACCACTGCTT
diff --git a/tests/universal_data/barcodes_small.tsv b/tests/universal_data/barcodes_small.tsv
new file mode 100644
index 00000000..3fd4bd57
--- /dev/null
+++ b/tests/universal_data/barcodes_small.tsv
@@ -0,0 +1,8 @@
+AAACCCGGGTTTAAAC
+TTTGGGCCCAAATTTG
+GGGGAAAACCCCTTTT
+ATCCTTAGTGTTTGTC
+CTTAGGAGTGGTTAGC
+TGAGGGCCAACGTGCT
+GCGCTTGCATAGCAGG
+ATCATGTCATTGTGAT
diff --git a/tests/universal_data/simple.mdf b/tests/universal_data/simple.mdf
new file mode 100644
index 00000000..f1ce3ecb
--- /dev/null
+++ b/tests/universal_data/simple.mdf
@@ -0,0 +1,3 @@
+Barcode:UMI:PolyT:cDNA
+Barcode	VAR_LIST	AAACCCGGGTTTAAAC,TTTGGGCCCAAATTTG,GGGGAAAACCCCTTTT
+UMI	VAR_ANY	10
diff --git a/tests/universal_data/test_reads.fq b/tests/universal_data/test_reads.fq
new file mode 100644
index 00000000..281eb1e2
--- /dev/null
+++ b/tests/universal_data/test_reads.fq
@@ -0,0 +1,16 @@
+@READ_1_ATCCTTAGTGTTTGTC_AACCTTAGACCG_forward
+TACACGACGCTCTTCCGATCTATCCTTAGTGTTTGTCAACCTTAGACCGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGGGCCTCAGCGTAATCCTATTGTGCATGAATTTTATGTCATCACTTGGCTTCAACATTGGGTTGTCATCATTGAACCCGCCCGGTTGCGTTGATACAATGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@READ_2_CTTAGGAGTGGTTAGC_GAGCCTCCTTGA_reverse
+AAGCAGTGGTATCAACGCAGAGTACATGGGAAGCAAGGAGGCAAGGCCCGCGCCAAGGCCAAGTCGCGGTCTTCCCGGGCCCGGGCTACCTATTCCCGGTGGGGCGTGTGCACCGGCTGCTGCGCAAGGGCAACTACGCGGAGCGTGTGGGCGCCGGCGCGCCGGTATACATGGCGGCGGTGCTGGAGTACCTAACGGCCGAGATCCTGGAGCTGGCGGGCAACGCGGCCCGCGACAACAAGAAGACTCGCATCATCCCGCGCCACCTGCAGCTGGCCATCCGCAACGACGAGGAGCTCAACAAGCTGCTGGGCAAAGTGACGATCGCGCAGGGCGGCGTCCTGCCCAACATCCAGGCCGTGCTGCTGCCCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATCAAGGAGGCTCGCTAACCACTCCTAAGAGATCGGAAGAGCGTCGTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@READ_3_TGAGGGCCAACGTGCT_CAAAGCCTTCCC_forward
+TACACGACGCTCTTCCGATCTTGAGGGCCAACGTGCTCAAAGCCTTCCCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAAGAGCAGGACACAGGAAGATGGACTCTGAGATCCAGCAGGAAGCGGAGAAGGCACTGCAGCAAAGGCTCTGCTCTGGAAGCAAAGGGAAGCAGCTTCCAAGAGAGCAGGGCAGGAGGAAGCCGACTGAGGACTGCTGTGCCGCAGGCGAGGCTTAAAGCCCAGAAAGCAATATGCAAGGTAGAAGAGACCAATGTGTACCGCCGAGAAGCTCAACGGAGGCACCTTGTGCCATGGTCTCAACTTAATCTAGGATGTTTCTGAAGCCGCCCAGACTCAACAAACTTGTCTACAAACTTTGTTTCTATTTGTAATGCTTAATCTTGTCTTCTTATACTTTTCAAATGTTAAGGTTTGTGCATAGGCAAGACTTTTTATATCAAAAGGACAGGAGTCTCTGTATACCCTGCACATGTATGTGAGCCTCTGTACCCTGCGGTACACATGTATGTGTGGCTCTGTACCCTGCAGTGCACACGTGTGTGTGGCTCTGTACCCTGCAGTGCACATGTGTGTGTGGCTCTGTACCCTGGCAGTGCAGAATGGCGGTGTGCGATGTACCCAGCATGCACTATTGTGTGTGGCCTCTAGAAGATCTGATGAAATGAGCATTCTCACCCTGCTTCCAGCTACATTATGATTTAAAAATATTTTTACATTTATTTGTATGTACTTGTATTACTGCATGAAAACTACTGCCACATTTGATCAGAGGACATGTACTTGTATCACAGCATGAAAACTACTGCAACATTTGATCAGAGGACATGTGTGTGTGTGTGCATGTGCGCTTGCATGTTTGTATCCACAGTGTATGTGCAGAAGTCAAAAGACAACTTGCACAAGTCAGCTCTATCTTTCTTTCTACTACATAGGGTCCAGGGGTAAAATTCAGGTCCTCAAATTTGCTGGCGAGCACCTATACTCGAATTGGACCACCTCACTTGGGACTTCCTAAAGACATGCTTAGACACTTACACCCCCGGATCAACATCTACACTAAAAGAATAGAAAGGCCATATTGACATCAACTAATCAGTTCTCCCATTATGACTCACACAAATCCTCAAGTGGATTACAGGATTAGGAGATCTGAATTTTATCTTTCTCCGACCCACTCTACCGAGTTTCCACACTTACCCTGCTCTGGTGGTAGAAGGCTCCCTAGCATATAGCCCAAGAGGCCACTCTTGGAGGAAAAGATTAGTTACAGTCAGTGAACCCCAGGAGAGAAAGTTCCTACATACTCACCCTCCCTGGGATAGCCTTCTCTCCCTGCACACAAAGCCCACCAACTATCCTGCAGCATTTCCTCCCTAAGTTACCTGTCTCCTCCTAATTACTCTCTTCCCAGTTTATTCATACATTCCCCTACAGAAGCTAGGAGGGCTGGGATTTCTGGCTGGCCTGCCCACTGCTGAAGTCCTTGGGAGTTCGTTGGAACATTGGGGTGCAATGTCAACAGAACAGTTTATTTGTCTGTATTCTTTCCATTGCACCCCATCTTCTCTATCACAATAGGTGTTTTTTCTTTCAAAGTAGCTACCACTGGACTCTTTCAAAATCTATCACAAAGATGGCCAGTTCTGCTGTGGGTTGCTAAGTCTGTCGTGTACCCTCAGAGTCTGCCTTATAAGGACTTGACAGAAATTATTTTTAACAACATTCTGTTGTGATATTATATGCAAGGCAGACCACATCTTCATGTATACATTCTGTTAAAACAACTAACCCTAAAGGCCAGGCATGGCCAGATGTTCATAGTCATTTGTGCATCTAGCCCTTGGCCATCCTGTGGTTCATAACTGAAGAAGACAAGACATGACTTGCTTCCCCCTAGACCATTGTGGCTGCTAAGGCCTACTTGGTGCTTGTAAAAATGAATCTCATTTTATTAATTGTGTGACTAAAGGTAAGTCACATAGTCCATCAGACCTTCATGTCTATCAACTGGAAAACAGTATCAATAGTAATACATTAGTGCTATAAGCCATTAGTCACTCTATATAAAACAAGCTCCTAAATGGTACTAAGTGAATAGAAGCTGTAATTATAGCTAGCATGATGACTGGTACATTTAATTGTAGGCTTTTGTCCCTATATTGTTTTGTAACAGGTTTTAGTTTATGCATCTAGACTTAGTTTGTCCAATTAAATCTTGAGTGTAACAACTGCAGAAAGTCTCCATGTTGCAACCACTTTGCAACATGCCTCTGAATGCTGTCTTCACCAGGTAATTCCCTTGGCTAATAGTTCTTAGGTTCATACGATATGCCAGACACAGTCTCTGTTTTGAAGGAGCTGCAGATCTCAAGAGTGAAAGGAAAGTGTCCACATGGATCTGGGGTGCATATACTCCCATGCTCCAACACAGCTGAGAAGATGGACTTTCTAACATTTACCACTTCAAATCCCACGGGAAAAGCACGAATGTATGAGCCACAGATTCTAAATAAAGAACTTAGACTATCTAACACGGAGTCAAGAAATAGGTGGAAGAAGGAAGATAAAGTTTCACCTCCTCAAGTAACGAATCCCATACCAGTCTGTTTGTAGATAGAACTGGTTAGAGTTAGCATCTTCCGTCCCCCTCGTTGGCTTGAGCTGGGGCAACCTTGGGTCTTTGGGTGGGTCCCATGTACTCTGCGTTGATACCACTGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+@READ_4_UNKNOWN_BARCODE_UNKNOWNUMI_random
+ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII