diff --git a/.github/workflows/build-tests.yml b/.github/workflows/build-tests.yml
index 5c4bbb13..ece7eb68 100644
--- a/.github/workflows/build-tests.yml
+++ b/.github/workflows/build-tests.yml
@@ -17,11 +17,11 @@ jobs:
       with:
         repo: 'WGLab/ContextSV'
         version: 'tags/v0.1.0'
-        file: 'TestData.zip'
+        file: 'SampleData.zip'
 
     - name: Unzip assets
       shell: bash --login {0}
-      run: unzip TestData.zip
+      run: unzip SampleData.zip -d tests
 
     - name: Set up conda (Miniconda only)
       uses: conda-incubator/setup-miniconda@v2
@@ -47,7 +47,7 @@ jobs:
         ls -l $CONDA_PREFIX/include/htslib
         make CONDA_PREFIX=$CONDA_PREFIX
 
-    - name: Run unit tests
+    - name: Test installation
       shell: bash --login {0}
       run: |
         source $(conda info --base)/etc/profile.d/conda.sh
@@ -55,6 +55,14 @@ jobs:
         ./build/contextsv --version
         ./build/contextsv --help
 
+    - name: Run tests
+      shell: bash --login {0}
+      run: |
+        source $(conda info --base)/etc/profile.d/conda.sh
+        conda activate contextsv
+        mkdir -p output
+        python -m pytest -s -v
+
       # run: |
       #   source $(conda info --base)/etc/profile.d/conda.sh
       #   conda activate contextsv
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..95933f28
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,21 @@
+# Use the miniconda container
+FROM continuumio/miniconda3:main
+
+# Version argument (set during build)
+ARG CONTEXTSV_VERSION
+
+WORKDIR /app
+
+RUN apt-get update
+RUN conda update conda
+
+# Install ContextSV
+RUN conda config --add channels wglab
+RUN conda config --add channels conda-forge
+RUN conda config --add channels bioconda
+RUN conda create -n contextsv python=3.9
+RUN echo "conda activate contextsv" >> ~/.bashrc
+SHELL ["/bin/bash", "--login", "-c"]
+RUN conda install -n contextsv -c wglab -c conda-forge -c bioconda contextsv=${CONTEXTSV_VERSION} && conda clean -afy
+
+ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "contextsv", "contextsv"]
diff --git a/Makefile b/Makefile
index 207f7e09..ae39b32b 100644
--- a/Makefile
+++ b/Makefile
@@ -4,13 +4,6 @@ SRC_DIR := $(CURDIR)/src
 BUILD_DIR := $(CURDIR)/build
 LIB_DIR := $(CURDIR)/lib
 
-# Version header
-VERSION := $(shell git describe --tags --always)
-VERSION_HEADER := $(INCL_DIR)/version.h
-.PHONY: $(VERSION_HEADER)
-	@echo "#pragma once" > $@
-	@echo "#define VERSION \"$(VERSION)\"" >> $@
-
 # Conda environment directories
 CONDA_PREFIX := $(shell echo $$CONDA_PREFIX)
 CONDA_INCL_DIR := $(CONDA_PREFIX)/include
@@ -50,4 +43,3 @@ $(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp
 # Clean the build directory
 clean:
 	rm -rf $(BUILD_DIR)
-	
\ No newline at end of file
diff --git a/README.md b/README.md
index 2cb17bf8..3a7e0cda 100644
--- a/README.md
+++ b/README.md
@@ -13,8 +13,32 @@ Class documentation is available at <a href="https://wglab.openbioinformatics.or
 </p>
 
 ## Installation
+### Anaconda
+First, install [Anaconda](https://www.anaconda.com/).
 
-### Building from source (for testing/development)
+Next, create a new environment. This installation has been tested with Python 3.9, Linux 64-bit.
+
+```
+conda create -n contextsv python=3.9
+conda activate contextsv
+```
+
+ContextSV and its dependencies can then be installed using the following command:
+
+```
+conda install -c wglab -c conda-forge -c bioconda contextsv
+```
+
+### Docker
+First, install [Docker](https://docs.docker.com/engine/install/).
+Pull the latest image from Docker hub, which contains the latest release and its dependencies.
+
+```
+docker pull genomicslab/contextsv
+```
+
+
+## Building from source (for testing/development)
 ContextSV requires HTSLib as a dependency that can be installed using  [Anaconda](https://www.anaconda.com/). Create an environment
 containing HTSLib: 
 
@@ -42,7 +66,6 @@ Options:
   -s, --snp <vcf_file>          SNPs VCF file (required)
   -o, --outdir <output_dir>     Output directory (required)
   -c, --chr <chromosome>        Chromosome
-  -r, --region <region>         Region (start-end)
   -t, --threads <thread_count>  Number of threads
   -h, --hmm <hmm_file>          HMM file
   -n, --sample-size <size>      Sample size for HMM predictions
diff --git a/__main__.py b/__main__.py
deleted file mode 100644
index 3821b8d1..00000000
--- a/__main__.py
+++ /dev/null
@@ -1,283 +0,0 @@
-"""
-__main__.py: Run the program.
-"""
-
-__version__ = "0.0.1"
-
-import os
-import sys
-import argparse
-import logging as log
-from io import StringIO
-
-from lib import contextsv
-from python import cnv_plots
-
-# Set up logging.
-log.basicConfig(
-    level=log.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-    handlers=[
-        log.StreamHandler(sys.stdout)
-    ]
-)
-
-# Define a class for redirecting c++ stdout to python logging info.
-class LogRedirect(StringIO):
-    """Redirect c++ stdout to python logging info."""
-    def write(self, buf):
-        super(LogRedirect, self).write(buf)
-        log.info(buf)
-
-# Redirect c++ stdout to python logging info.
-sys.stdout = LogRedirect()
-
-def main():
-    """Entry point and user interface for the program."""
-
-    # Grab the command line arguments using argparse.
-    parser = argparse.ArgumentParser(
-        description="ContextSV: A tool for integrative structural variant detection."
-    )
-
-    # Add common arguments.
-    parser.add_argument(
-        "-lr", "--long-read",
-        help="path to the long read alignment BAM file",
-        required=True
-    )
-
-    parser.add_argument(
-        "-g", "--reference",
-        help="path to the reference genome FASTA file",
-        required=False
-    )
-
-    parser.add_argument(
-        "-s", "--snps",
-        help="path to the SNPs VCF file",
-        required=False
-    )
-
-    parser.add_argument(
-        '-c', '--chr',
-        help="chromosome to analyze (e.g. 1, 2, 3, ..., X, Y)",
-        required=False,
-        default="",
-        type=str
-    )
-
-    parser.add_argument(
-        "-r", "--region",
-        help="region to analyze (e.g. 1:1000-2000)",
-        required=False,
-        default="",
-        type=str
-    )
-
-    # Specify the ethnicity of the sample for obtaining population allele
-    # frequencies from a database such as gnomAD. If not provided, the allele
-    # frequencies will be obtained for all populations.
-    parser.add_argument(
-        "-e", "--ethnicity",
-        help="ethnicity of the sample (e.g. afr, amr, eas, fin, nfe, oth, sas, asj)",
-        required=False
-    )
-
-    # Text file with VCF filepaths of SNP population allele frequencies for each
-    # chromosome from a database such as gnomAD (e.g. 1=chr1.vcf.gz\n2=chr2.vcf.gz\n...).
-    parser.add_argument(
-        "--pfb",
-        help="path to the file with SNP population frequency VCF filepaths (see docs for format)",
-        required=False
-    )
-
-    parser.add_argument(
-        "-o", "--output",
-        help="path to the output directory",
-        required=True
-    )
-
-    # Thread count.
-    parser.add_argument(
-        "-t", "--threads",
-        help="number of threads to use",
-        required=False,
-        default=1,
-        type=int
-    )
-
-    # HMM file path.
-    parser.add_argument(
-        "--hmm",
-        help="path to the PennCNV HMM file",
-        required=False
-    )
-
-    # Window size for calculating log2 ratios for CNV predictions.
-    parser.add_argument(
-        "--window-size",
-        help="window size for calculating log2 ratios for CNV predictions (default: 2500 bp)",
-        required=False,
-        type=int,
-        default=2500
-    )
-
-    # Minimum SV length for copy number variation (CNV) predictions.
-    parser.add_argument(
-        "--min-cnv-length",
-        help="minimum SV length for CNV predictions (default: 1000 bp)",
-        required=False,
-        type=int,
-        default=1000
-    )
-
-    # Verbose mode.
-    parser.add_argument(
-        "-d", "--debug",
-        help="debug mode (verbose logging)",
-        action="store_true",
-        default=False
-    )
-
-    parser.add_argument(
-        "-v", "--version",
-        help="print the version number and exit",
-        action="version",
-        version=f"contextSV version {__version__}"
-    )
-
-    # Extend SNP-based CNV predictions to regions surrounding SVs (+/- 1/2 SV
-    # length) and save CNV data to TSV. This will be useful for plotting CNV
-    # data around SVs, but takes longer to run.
-    parser.add_argument(
-        "--save-cnv",
-        required=False,
-        action="store_true",
-        default=False,
-        help=argparse.SUPPRESS
-    )
-
-    # Mode 1: SV detection mode.
-    # Short read alignment file (BAM), reference genome, and short read SNPs file.
-    parser.add_argument(
-        "-sr", "--short-read",
-        required=False,
-        help=argparse.SUPPRESS
-    )
-    
-    # ----------------------------------------------------------------------- #
-
-    # Run the program.
-
-    # Get the command line arguments.
-    args = parser.parse_args()
-
-    # Ensure BAM, reference, and SNPs files are provided.
-    arg_error = False
-    if (args.long_read is None):
-        log.error("Please provide the long read alignment file (BAM).")
-        arg_error = True
-
-    if (args.reference is None):
-        log.error("Please provide the reference genome.")
-        arg_error = True
-
-    # Short read alignment file is optional. Use the long read alignment
-    # file if it is not provided.
-    if (args.short_read is None):
-        log.warning("Short read alignment file not provided. Using long read alignment file in its place.")
-        args.short_read = args.long_read
-
-    # SNPs file is required
-    if (args.snps is None):
-        log.error("Please provide the SNPs file.")
-        arg_error = True
-
-    # Exit if there are any errors.
-    if (arg_error):
-        # Exit with error code 1.
-        sys.exit(1)
-
-    # Set all None values to empty strings.
-    for key, value in vars(args).items():
-        if value is None:
-            setattr(args, key, "")
-        else:
-            log.info("Setting %s to %s", key, value)
-    
-    # Loop and set all None values to empty strings.
-    for key, value in vars(args).items():
-        if value is None:
-            setattr(args, key, "")
-
-    # Set input parameters
-    input_data = contextsv.InputData()
-    input_data.setVerbose(args.debug)
-    input_data.setLongReadBam(args.long_read)
-    input_data.setRefGenome(args.reference)
-    input_data.setSNPFilepath(args.snps)
-    input_data.setEthnicity(args.ethnicity)
-    input_data.setThreadCount(args.threads)
-    input_data.setChromosome(args.chr)
-    input_data.setRegion(args.region)
-    input_data.setAlleleFreqFilepaths(args.pfb)
-    input_data.setHMMFilepath(args.hmm)
-    input_data.setOutputDir(args.output)
-    input_data.saveCNVData(args.save_cnv)
-    input_data.setWindowSize(args.window_size)
-    input_data.setMinCNVLength(args.min_cnv_length)
-
-    # Run the analysis
-    contextsv.run(input_data)
-
-    # Determine the data paths for downstream analysis.
-    vcf_path = os.path.join(args.output, "output.vcf")
-    output_dir = args.output
-    # cnv_data_path = os.path.join(args.output, "cnv_data.tsv")
-
-    # Generate python-based CNV plots if SNP-based CNV predictions are enabled
-    if (args.save_cnv):
-        log.info("Generating CNV plots...")
-
-        # Find all TSV files in the output directory
-        for file in os.listdir(output_dir):
-            if file.endswith(".tsv"):
-                cnv_data_path = os.path.join(output_dir, file)
-
-                # Set the HTML output path by changing the file extension
-                output_html = os.path.splitext(cnv_data_path)[0] + ".html"
-
-                # Generate the CNV plot for the current TSV file
-                cnv_plots.run(cnv_data_path, output_html)
-        # cnv_plots.run(vcf_path, cnv_data_path, output_dir, region)
-
-    log.info("Complete. File saved to %s\nThank you for using ContextSV!", vcf_path)
-
-if __name__ == '__main__':
-
-    # Check if the user specified the --merge flag.
-    if "--merge" in sys.argv:
-        # Ensure the user provided the correct number of arguments (last 2 are
-        # optional).
-        if len(sys.argv) < 2:
-            log.error("Usage: python __main__.py --merge <input_vcf> (optional: <epsilon> <suffix>)")
-            sys.exit(1)
-
-        # The second argument is the input VCF file.
-        input_vcf = sys.argv[2]
-
-        # The third argument is the epsilon value for the DBSCAN clustering. If
-        # empty, set to 34.
-        epsilon = sys.argv[3] if len(sys.argv) >= 4 else 34
-
-        # The fourth argument is the suffix for the output file. If empty, set
-        # to ".merged"
-        suffix = sys.argv[4] if len(sys.argv) >= 5 else ".merged"
-
-        # Run the SV merger.
-        from python import sv_merger
-        sv_merger.sv_merger(input_vcf, mode='dbscan', eps=int(epsilon), suffix=suffix)
-
-    # Run the program.
-    main()
diff --git a/conda/meta.yaml b/conda/meta.yaml
index 698a62e4..d217ee43 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,46 +1,45 @@
-{% set version = "1.0.0" %}
-{% set sha256 = "" %}
+{% set version = environ.get('GIT_DESCRIBE_TAG').lstrip('v') %}
+{% set revision = environ.get('GIT_FULL_HASH') %}
 
 package:
-  name: longreadsum
+  name: contextsv
   version: {{ version }}
 
 source:
-  url: https://github.com/WGLab/LongReadSum/archive/refs/tags/v{{ version }}.tar.gz
-  sha256: '{{ sha256 }}'
+  path: ../
+  git_lfs: false
+
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
 
 build:
   number: 0
-  skip: true  # [py2k]
-  entry_points:
-    - contextsv = entry_point:main
 
 requirements:
   build:
     - {{ compiler('cxx') }}
     - make
   host:
-    - python
-    - swig
-    - htslib
+    - htslib=1.20
   run:
-    - python
-    - numpy
-    - htslib
-    - swig
-    - plotly
-    - pandas
-    - scikit-learn
-    - joblib
+    - htslib=1.20
 
 test:
     commands:
         - contextsv --help
+        - test -f $PREFIX/bin/contextsv
+        - contextsv --version
 about:
   home: https://github.com/WGLab/ContextSV
   license: MIT
-  summary: 'Long read structural variant calling tool'
+  license_family: MIT
+  license_file: LICENSE
+  summary: 'Long-read structural variant caller'
   description: |
-    An alignment-based, generalized structural variant caller for long-read sequencing/mapping data.
+    ContextSV is a tool for calling structural variants from long read
+    sequencing data that integrates important copy number and single-nucleotide
+    allele frequency information to improve accuracy.
   dev_url: https://github.com/WGLab/ContextSV
   doc_url: https://github.com/WGLab/ContextSV#readme
diff --git a/include/cnv_caller.h b/include/cnv_caller.h
index afdd78b3..293e547a 100644
--- a/include/cnv_caller.h
+++ b/include/cnv_caller.h
@@ -105,9 +105,6 @@ class CNVCaller {
 
         void readSNPAlleleFrequencies(std::string chr, uint32_t start_pos, uint32_t end_pos, std::vector<uint32_t>& snp_pos, std::unordered_map<uint32_t, double>& snp_baf, std::unordered_map<uint32_t, double>& snp_pfb, const InputData& input_data) const;
 
-        // Save a TSV with B-allele frequencies, log2 ratios, and copy number predictions
-        void saveSVCopyNumberToTSV(SNPData& snp_data, std::string filepath, std::string chr, uint32_t start, uint32_t end, std::string sv_type, double likelihood) const;
-
         void saveSVCopyNumberToJSON(SNPData& before_sv, SNPData& after_sv, SNPData& snp_data, std::string chr, uint32_t start, uint32_t end, std::string sv_type, double likelihood, const std::string& filepath) const;
 };
 
diff --git a/include/input_data.h b/include/input_data.h
index 1e2c3c1e..32c78ce2 100644
--- a/include/input_data.h
+++ b/include/input_data.h
@@ -77,11 +77,6 @@ class InputData {
         std::string getChromosome() const;
         bool isSingleChr() const;
 
-        // Set the region to analyze.
-        void setRegion(std::string region);
-        std::pair<int32_t, int32_t> getRegion() const;
-        bool isRegionSet() const;
-
         // Set the output directory where the results will be written.
         void setOutputDir(std::string dirpath);
         std::string getOutputDir() const;
@@ -116,7 +111,6 @@ class InputData {
         double dbscan_min_pts_pct;
         std::string chr;  // Chromosome to analyze
         std::pair<int32_t, int32_t> start_end;  // Region to analyze
-        bool region_set;  // True if a region is set
         int thread_count;
         std::string hmm_filepath;
         std::string cnv_filepath;
diff --git a/include/utils.h b/include/utils.h
index d95f0a8a..bb6d9847 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -13,40 +13,6 @@
 /// @endcond
 
 
-// Guard to close the BAM file
-// struct BamFileGuard {
-//     samFile* fp_in;
-//     hts_idx_t* idx;
-//     bam_hdr_t* bamHdr;
-
-//     BamFileGuard(samFile* fp_in, hts_idx_t* idx, bam_hdr_t* bamHdr)
-//         : fp_in(fp_in), idx(idx), bamHdr(bamHdr) {}
-
-//     ~BamFileGuard() {
-//         if (idx) {
-//             hts_idx_destroy(idx);
-//             idx = nullptr;
-//         }
-//         if (bamHdr) {
-//             bam_hdr_destroy(bamHdr);
-//             bamHdr = nullptr;
-//         }
-//         if (fp_in) {
-//             sam_close(fp_in);
-//             fp_in = nullptr;
-//         }
-//     }
-
-//     BamFileGuard(const BamFileGuard&) = delete;  // Non-copyable
-//     BamFileGuard& operator=(const BamFileGuard&) = delete;  // Non-assignable
-// };
-
-// Print the progress of a task
-void printProgress(int progress, int total);
-
-// Run bcftools to determine the chr notation of a VCF file
-bool isChrNotation(std::string vcf_filepath);
-
 // Print a message to stdout in a thread-safe manner
 void printMessage(std::string message);
 
@@ -55,8 +21,6 @@ void printError(std::string message);
 
 std::string getElapsedTime(std::chrono::high_resolution_clock::time_point start, std::chrono::high_resolution_clock::time_point end);
 
-std::string removeChrPrefix(std::string chr);
-
 void printMemoryUsage(const std::string &functionName);
 
 bool fileExists(const std::string &filepath);
diff --git a/include/version.h b/include/version.h
index d38178a8..5a232bc4 100644
--- a/include/version.h
+++ b/include/version.h
@@ -1,2 +1,9 @@
-#pragma once
-#define VERSION "v0,1,0-41-gd62fe12"
+// version.h
+#ifndef CONTEXTSV_VERSION_H
+#define CONTEXTSV_VERSION_H
+
+#define VERSION_MAJOR 1
+#define VERSION_MINOR 0
+#define VERSION_PATCH 0
+
+#endif // CONTEXTSV_VERSION_H
diff --git a/python/cnv_plots_json.py b/python/cnv_plots_json.py
index 768058e9..9f2c9458 100644
--- a/python/cnv_plots_json.py
+++ b/python/cnv_plots_json.py
@@ -5,7 +5,7 @@
 import plotly
 from plotly.subplots import make_subplots
 
-min_sv_length = 200000 # Minimum SV length in base pairs
+min_sv_length = 60000 # Minimum SV length in base pairs
 
 # Set up argument parser
 parser = argparse.ArgumentParser(description='Generate CNV plots from JSON data.')
@@ -20,12 +20,12 @@
 # State marker colors
 # https://community.plotly.com/t/plotly-colours-list/11730/6
 state_colors_dict = {
-    '1': 'red',
-    '2': 'darkred',
-    '3': 'darkgreen',
+    '1': 'darkred',
+    '2': 'red',
+    '3': 'gray',
     '4': 'green',
-    '5': 'darkblue',
-    '6': 'blue',
+    '5': 'blue',
+    '6': 'darkblue',
 }
 
 sv_type_dict = {
@@ -39,6 +39,7 @@
 
     # If a chromosome is specified, filter the SVs by that chromosome
     if args.chromosome and sv['chromosome'] != args.chromosome:
+        print(f"Skipping SV {sv['chromosome']}:{sv['start']}-{sv['end']} of type {sv['sv_type']} (not on chromosome {args.chromosome})")
         continue
 
     # Filter out SVs that are smaller than the minimum length
@@ -224,7 +225,7 @@
 
     # Set the title of the plot.
     fig.update_layout(
-        title_text = f"{sv_type_dict[sv_type]} at {chromosome}:{start}-{end} ({sv_length} bp) (LLH={likelihood})",
+        title_text = f"{sv_type_dict[sv_type]} at {chromosome}:{start}-{end} ({sv_length} bp)",
         title_x = 0.5,
         showlegend = False,
     )
diff --git a/setup.py b/setup.py
deleted file mode 100644
index c8591523..00000000
--- a/setup.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""
-setup.py:
-    This file is used to install the package.
-"""
-
-import os
-import glob
-from setuptools import setup, find_packages, Extension
-
-print("Running setup.py...")
-
-
-# Set the project metadata
-NAME = "contextsv"
-VERSION = "1.0.0"
-AUTHOR = "WGLab"
-DESCRIPTION = "ContextSV: A tool for integrative structural variant detection."
-
-# Get the conda environment's include path
-conda_prefix = os.environ.get("CONDA_PREFIX")
-if conda_prefix is None:
-    raise AssertionError("CONDA_PREFIX is not set.")
-
-conda_include_dir = os.path.join(conda_prefix, "include")
-
-# Get the conda environment's lib path
-conda_lib_dir = os.path.join(conda_prefix, "lib")
-
-# Set the project dependencies
-SRC_DIR = "src"
-# SRC_FILES = glob.glob(os.path.join(SRC_DIR, "*.cpp"))
-SRC_FILES = glob.glob(os.path.join(SRC_DIR, "*.cpp"))
-SRC_FILES = [f for f in SRC_FILES if "main.cpp" not in f]  # Ignore the main.cpp file
-
-INCLUDE_DIR = "include"
-INCLUDE_FILES = glob.glob(os.path.join(INCLUDE_DIR, "*.h"))
-
-# Set up the extension
-ext = Extension(
-    name="_" + NAME,
-    sources=SRC_FILES,
-    include_dirs=[INCLUDE_DIR, conda_include_dir],
-    extra_compile_args=["-std=c++17"],
-    language="c++",
-    libraries=["hts"],
-    library_dirs=[conda_lib_dir]
-)
-
-# Set up the module
-setup(
-    name=NAME,
-    version=VERSION,
-    author=AUTHOR,
-    description=DESCRIPTION,
-    ext_modules=[ext],
-    py_modules=[NAME],
-    packages=find_packages(),
-    test_suite="tests",
-    entry_points={
-        "console_scripts": [
-            "contextsv = contextsv:main"
-        ]
-    }
-)
-
diff --git a/src/cnv_caller.cpp b/src/cnv_caller.cpp
index 66f1f146..c91d1d31 100644
--- a/src/cnv_caller.cpp
+++ b/src/cnv_caller.cpp
@@ -171,22 +171,6 @@ std::tuple<double, SVType, Genotype, int> CNVCaller::runCopyNumberPrediction(std
         printError("ERROR: Invalid SV region for copy number prediction: " + chr + ":" + std::to_string((int)start_pos) + "-" + std::to_string((int)end_pos));
         return std::make_tuple(0.0, SVType::UNKNOWN, Genotype::UNKNOWN, 0);
     }
-    /*
-    // Check that there is no large number of zero-depth positions in the region
-    int zero_depth_count = 0;
-    for (uint32_t pos = start_pos; pos <= end_pos; pos++)
-    {
-        if (pos < pos_depth_map.size() && pos_depth_map[pos] == 0)
-        {
-            zero_depth_count++;
-        }
-    }
-    if (zero_depth_count > 0.1 * (end_pos - start_pos + 1))
-    {
-        printError("WARNING: Too many zero-depth positions in the SV region for copy number prediction, skipping: " + chr + ":" + std::to_string((int)start_pos) + "-" + std::to_string((int)end_pos));
-        return std::make_tuple(0.0, SVType::UNKNOWN, Genotype::UNKNOWN, 0);
-    }
-    */
    
     // Run the Viterbi algorithm on SNPs in the SV region
     // Only extend the region if "save CNV data" is enabled
@@ -226,27 +210,34 @@ std::tuple<double, SVType, Genotype, int> CNVCaller::runCopyNumberPrediction(std
     std::vector<int>& state_sequence = prediction.first;
     double likelihood = prediction.second;
 
-    // Determine if there is a majority state within the SV region
-    int max_state = 0;
-    int max_count = 0;
+    // Get state percentages
+    std::unordered_map<int, double> state_pct;
+    double state_count = (double) state_sequence.size();
+    double largest_non_neutral_pct = 0.0;
+    int non_neutral_state = 0;
     for (int i = 0; i < 6; i++)
     {
-        int state_count = std::count(state_sequence.begin(), state_sequence.end(), i+1);
-        if (state_count > max_count)
+        state_pct[i+1] = (double)std::count(state_sequence.begin(), state_sequence.end(), i+1) / state_count;
+        if (i+1 != 3 && state_pct[i+1] > largest_non_neutral_pct)
         {
-            max_state = i+1;
-            max_count = state_count;
+            largest_non_neutral_pct = state_pct[i+1];
+            non_neutral_state = i+1;
         }
     }
 
-    // If there is no majority state, then set the state to unknown
-    double pct_threshold = 0.50;
-    int state_count = (int) state_sequence.size();
-    if ((double) max_count / (double) state_count < pct_threshold)
+    // Use the state exceeding the threshold if non-neutral
+    double pct_threshold = 0.3;
+    int max_state = 0;  // Unknown state
+    if (largest_non_neutral_pct > pct_threshold)
     {
-        max_state = 0;
+        max_state = non_neutral_state;
+
+    // Use the neutral state if it exceeds the threshold
+    } else if (state_pct[3] > pct_threshold) {
+        max_state = 3;
     }
 
+    // Determine the genotype and SV type
     Genotype genotype = getGenotypeFromCNState(max_state);
     SVType predicted_cnv_type = getSVTypeFromCNState(max_state);
 
@@ -817,88 +808,6 @@ void CNVCaller::readSNPAlleleFrequencies(std::string chr, uint32_t start_pos, ui
     bcf_sr_destroy(pfb_reader);
 }
 
-void CNVCaller::saveSVCopyNumberToTSV(SNPData& snp_data, std::string filepath, std::string chr, uint32_t start, uint32_t end, std::string sv_type, double likelihood) const
-{
-    // Open the TSV file for writing
-    std::ofstream tsv_file(filepath);
-    if (!tsv_file.is_open())
-    {
-        std::cerr << "ERROR: Could not open TSV file for writing: " << filepath << std::endl;
-        exit(1);
-    }
-
-    // Ensure all values are valid, and print an error message if not
-    if (chr == "" || start == 0 || end == 0 || sv_type == "")
-    {
-        std::cerr << "ERROR: Invalid SV information for TSV file: " << chr << ":" << start << "-" << end << " " << sv_type << std::endl;
-        exit(1);
-    }
-
-    if (snp_data.pos.size() == 0)
-    {
-        std::cerr << "ERROR: No SNP data available for TSV file: " << chr << ":" << start << "-" << end << " " << sv_type << std::endl;
-        exit(1);
-    }
-
-    if (likelihood == 0.0)
-    {
-        std::cerr << "ERROR: Invalid likelihood value for TSV file: " << chr << ":" << start << "-" << end << " " << sv_type << std::endl;
-        exit(1);
-    }
-
-    if (sv_type.empty())
-    {
-        std::cerr << "ERROR: Invalid SV type for TSV file: " << chr << ":" << start << "-" << end << " " << sv_type << std::endl;
-        exit(1);
-    }
-
-    // Format the size string in kb
-    std::stringstream ss;
-    ss << std::fixed << std::setprecision(6) << likelihood;
-    std::string likelihood_str = ss.str();
-
-    // Print SV information to the TSV file
-    tsv_file << "SVTYPE=" << sv_type << std::endl;
-    tsv_file << "POS=" << chr << ":" << start << "-" << end << std::endl;
-    tsv_file << "HMM_LOGLH=" << likelihood_str << std::endl;
-
-    // Write the header
-    tsv_file << "chromosome\tposition\tsnp\tb_allele_freq\tlog2_ratio\tcnv_state\tpopulation_freq" << std::endl;
-
-    // Write the data
-    int snp_count = (int) snp_data.pos.size();
-    for (int i = 0; i < snp_count; i++)
-    {
-        // Get the SNP data
-        uint32_t pos        = snp_data.pos[i];
-        bool    is_snp     = snp_data.is_snp[i];
-        double  pfb        = snp_data.pfb[i];
-        double  baf        = snp_data.baf[i];
-        double  log2_ratio = snp_data.log2_cov[i];
-        int     cn_state   = snp_data.state_sequence[i];
-
-        // If the SNP is not a SNP, then set the BAF to 0.0
-        if (!is_snp)
-        {
-            baf = 0.0;
-        }
-
-        // Write the TSV line (chrom, pos, baf, lrr, state)
-        tsv_file << \
-            chr          << "\t" << \
-            pos          << "\t" << \
-            is_snp       << "\t" << \
-            baf          << "\t" << \
-            log2_ratio   << "\t" << \
-            cn_state     << "\t" << \
-            pfb          << \
-        std::endl;
-    }
-
-    // Close the file
-    tsv_file.close();
-}
-
 void CNVCaller::saveSVCopyNumberToJSON(SNPData &before_sv, SNPData &after_sv, SNPData &snp_data, std::string chr, uint32_t start, uint32_t end, std::string sv_type, double likelihood, const std::string& filepath) const
 {
     // Append the SV information to the JSON file
diff --git a/src/fasta_query.cpp b/src/fasta_query.cpp
index e4f0e1dc..237db443 100644
--- a/src/fasta_query.cpp
+++ b/src/fasta_query.cpp
@@ -174,7 +174,7 @@ uint32_t ReferenceGenome::getChromosomeLength(std::string chr) const
     }
     catch (const std::out_of_range& e)
     {
-        printError("Chromosome " + chr + " not found in reference genome");
+        printError("Length for chromosome " + chr + " not found in reference genome");
         return 0;
     }
 }
diff --git a/src/input_data.cpp b/src/input_data.cpp
index 3b53a7d7..936f7e62 100644
--- a/src/input_data.cpp
+++ b/src/input_data.cpp
@@ -21,8 +21,6 @@ InputData::InputData()
     this->ref_filepath = "";
     this->snp_vcf_filepath = "";
     this->chr = "";
-    this->start_end = std::make_pair(0, 0);
-    this->region_set = false;
     this->output_dir = "";
     this->sample_size = 20;
     this->min_cnv_length = 2000;  // Default minimum CNV length
@@ -49,14 +47,6 @@ void InputData::printParameters() const
     DEBUG_PRINT("Minimum CNV length: " << this->min_cnv_length);
     DEBUG_PRINT("DBSCAN epsilon: " << this->dbscan_epsilon);
     DEBUG_PRINT("DBSCAN minimum points percentage: " << this->dbscan_min_pts_pct * 100.0f << "%");
-    if (this->region_set)
-    {
-        DEBUG_PRINT("Region set to: chr" + this->chr + ":" + std::to_string(this->start_end.first) + "-" + std::to_string(this->start_end.second));
-    }
-    else
-    {
-        DEBUG_PRINT("Running on whole genome");
-    }
 }
 
 std::string InputData::getLongReadBam() const
@@ -218,47 +208,6 @@ bool InputData::isSingleChr() const
     return this->single_chr;
 }
 
-void InputData::setRegion(std::string region)
-{
-    // Check if the region is valid
-    if (region != "")
-    {
-        // Split the region by colon
-        std::istringstream ss(region);
-        std::string token;
-        std::vector<std::string> region_tokens;
-
-        while (std::getline(ss, token, '-'))
-        {
-            region_tokens.push_back(token);
-        }
-
-        // Check if the region is valid
-        if (region_tokens.size() == 2)
-        {
-            // Get the start and end positions
-            int32_t start = std::stoi(region_tokens[0]);
-            int32_t end = std::stoi(region_tokens[1]);
-
-            // Set the region
-            this->start_end = std::make_pair(start, end);
-            this->region_set = true;
-
-            std::cout << "Region set to " << this->chr << ":" << start << "-" << end << std::endl;
-        }
-    }
-}
-
-std::pair<int32_t, int32_t> InputData::getRegion() const
-{
-    return this->start_end;
-}
-
-bool InputData::isRegionSet() const
-{
-    return this->region_set;
-}
-
 void InputData::setAlleleFreqFilepaths(std::string filepath)
 {
     // Check if empty string
@@ -275,19 +224,18 @@ void InputData::setAlleleFreqFilepaths(std::string filepath)
             exit(1);
         }
 
-        // If a region is set, load only the chromosome in the region
+        // If a chr is specified, load only the file for that chromosome
         std::string target_chr;
         if (this->chr != "")
         {
             target_chr = this->chr;
 
-            // Check if the region is in chr notation
+            // Check if in chr notation
             if (target_chr.find("chr") != std::string::npos)
             {
                 // Remove the chr notation
                 target_chr = target_chr.substr(3, target_chr.size() - 3);
             }
-            //std::cout << "Loading population allele frequency file for chromosome " << target_chr << std::endl;
         }
 
         // Load the file and create a map of chromosome -> VCF file
diff --git a/src/main.cpp b/src/main.cpp
index 874f444f..45984eb2 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -5,18 +5,19 @@
 #include <iostream>
 #include <string>
 #include <ctime>
+#include <array>
+#include <cstdio>
+#include <memory>
 
 // For signal handling
 #include <signal.h>
 #include <execinfo.h>
 
-// #include <optional>
 /// @endcond
 
 #include "input_data.h"
-#include "version.h"
 #include "utils.h"
-
+#include "version.h"
 
 void printStackTrace(int sig)
 {
@@ -40,7 +41,7 @@ void printBanner()
     std::strftime(date_str, sizeof(date_str), "%Y-%m-%d", std::localtime(&now));
     std::cout << "═══════════════════════════════════════════════════════════════" << std::endl;
     std::cout << "  ContextSV - Long-read Structural Variant Caller" << std::endl;
-    std::cout << "      Version: " << VERSION << std::endl;
+    std::cout << "      Version: " << VERSION_MAJOR << "." << VERSION_MINOR << "." << VERSION_PATCH << std::endl;
     std::cout << "      Date: " << date_str << std::endl;
     std::cout << "═══════════════════════════════════════════════════════════════" << std::endl;
 }
@@ -67,9 +68,6 @@ void runContextSV(const std::unordered_map<std::string, std::string>& args)
     if (args.find("chr") != args.end()) {
         input_data.setChromosome(args.at("chr"));
     }
-    if (args.find("region") != args.end()) {
-        input_data.setRegion(args.at("region"));
-    }
     if (args.find("thread-count") != args.end()) {
         input_data.setThreadCount(std::stoi(args.at("thread-count")));
     }
@@ -125,14 +123,14 @@ void runContextSV(const std::unordered_map<std::string, std::string>& args)
 }
 
 void printUsage(const std::string& programName) {
-    std::cerr << "Usage: " << programName << " [options]\n"
+    std::cout << "ContextSV v" << VERSION_MAJOR << "." << VERSION_MINOR << "." << VERSION_PATCH << std::endl;
+    std::cout << "Usage: " << programName << " [options]\n"
                 << "Options:\n"
                 << "  -b, --bam <bam_file>          Long-read BAM file (required)\n"
                 << "  -r, --ref <ref_file>          Reference genome FASTA file (required)\n"
                 << "  -s, --snp <vcf_file>          SNPs VCF file (required)\n"
                 << "  -o, --outdir <output_dir>     Output directory (required)\n"
                 << "  -c, --chr <chromosome>        Chromosome\n"
-                << "  -r, --region <region>         Region (start-end)\n"
                 << "  -t, --threads <thread_count>  Number of threads\n"
                 << "  -h, --hmm <hmm_file>          HMM file\n"
                 << "  -n, --sample-size <size>      Sample size for HMM predictions\n"
@@ -191,7 +189,7 @@ std::unordered_map<std::string, std::string> parseArguments(int argc, char* argv
         } else if (arg == "--debug") {
             args["debug"] = "true";
         } else if ((arg == "-v" || arg == "--version")) {
-            std::cout << "ContextSV version " << VERSION << std::endl;
+            std::cout << "ContextSV v" << VERSION_MAJOR << "." << VERSION_MINOR << "." << VERSION_PATCH << std::endl;
             exit(0);
         } else if (arg == "-h" || arg == "--help") {
             printUsage(argv[0]);
@@ -218,6 +216,7 @@ std::unordered_map<std::string, std::string> parseArguments(int argc, char* argv
 int main(int argc, char* argv[]) {
     auto args = parseArguments(argc, argv);
     runContextSV(args);
+    std::cout << "ContextSV finished successfully!" << std::endl;
 
     return 0;
 }
diff --git a/src/sv_caller.cpp b/src/sv_caller.cpp
index 29dab604..d59e6598 100644
--- a/src/sv_caller.cpp
+++ b/src/sv_caller.cpp
@@ -26,11 +26,11 @@
 #include "ThreadPool.h"
 #include "utils.h"
 #include "sv_types.h"
-#include "version.h"
 #include "fasta_query.h"
 #include "dbscan.h"
 #include "dbscan1d.h"
 #include "debug.h"
+#include "version.h"
 /// @endcond
 
 # define DUP_SEQSIM_THRESHOLD 0.9  // Sequence similarity threshold for duplication detection
@@ -149,7 +149,7 @@ void SVCaller::findSplitSVSignatures(std::unordered_map<std::string, std::vector
             // reverse)
             std::pair<int, int> qpos = getAlignmentReadPositions(bam1);
 
-            primary_map[bam1->core.tid][qname] = PrimaryAlignment{bam1->core.pos + 1, bam_endpos(bam1), qpos.first, qpos.second, !(bam1->core.flag & BAM_FREVERSE), 0};
+            primary_map[bam1->core.tid][qname] = PrimaryAlignment{static_cast<int>(bam1->core.pos + 1), static_cast<int>(bam_endpos(bam1)), static_cast<int>(qpos.first), static_cast<int>(qpos.second), !(bam1->core.flag & BAM_FREVERSE), 0};
             alignment_tids.insert(bam1->core.tid);
             primary_count++;
 
@@ -159,7 +159,7 @@ void SVCaller::findSplitSVSignatures(std::unordered_map<std::string, std::vector
             // supplementary alignment, and the strand (true for forward, false
             // for reverse)
             std::pair<int, int> qpos = getAlignmentReadPositions(bam1);
-            supp_map[qname].push_back(SuppAlignment{bam1->core.tid, bam1->core.pos + 1, bam_endpos(bam1), qpos.first, qpos.second, !(bam1->core.flag & BAM_FREVERSE)});
+            supp_map[qname].push_back(SuppAlignment{bam1->core.tid, static_cast<int>(bam1->core.pos + 1), static_cast<int>(bam_endpos(bam1)), static_cast<int>(qpos.first), static_cast<int>(qpos.second), !(bam1->core.flag & BAM_FREVERSE)});
             alignment_tids.insert(bam1->core.tid);
             supp_qnames.insert(qname);
             supplementary_count++;
@@ -415,23 +415,6 @@ void SVCaller::findSplitSVSignatures(std::unordered_map<std::string, std::vector
                 supp_end = true;
             }
 
-            // Store the inversion as the supplementary start and end positions
-            // if (inversion && supp_positions.size() > 1) {
-            //     std::sort(supp_positions.begin(), supp_positions.end());
-            //     int supp_start = supp_positions.front();
-            //     int supp_end = supp_positions.back();
-            //     int sv_length = std::abs(supp_start - supp_end);
-
-            //     // Use 50bp as the minimum length for an inversion
-            //     if (sv_length >= 50 && sv_length <= max_length) {
-            //         SVEvidenceFlags aln_type;
-            //         aln_type.set(static_cast<size_t>(SVDataType::SUPPINV));
-            //         SVCall sv_candidate(supp_start, supp_end, SVType::INV, getSVTypeSymbol(SVType::INV), aln_type, Genotype::UNKNOWN, 0.0, 0, 0, supp_cluster_size);
-            //         // SVCall sv_candidate(supp_start, supp_end, SVType::INV, getSVTypeSymbol(SVType::INV), SVDataType::SUPPINV, Genotype::UNKNOWN, 0.0, 0, 0, supp_cluster_size);
-            //         addSVCall(chr_sv_calls, sv_candidate);
-            //     }
-            // }
-
             // -------------------------------
             // SPLIT INSERTION CALLS
             int read_distance = 0;
@@ -755,13 +738,6 @@ void SVCaller::processChromosome(const std::string& chr, std::vector<SVCall>& ch
     bam_hdr_destroy(bamHdr);
 
     printMessage(chr + ": Merging CIGAR...");
-    // Save JSON if chr21
-    // if (chr == "chr21") {
-    //     std::string json_fp = input_data.getOutputDir() + "/" + chr + ".json";
-    //     mergeSVs(chr_sv_calls, dbscan_epsilon, dbscan_min_pts, true, json_fp);
-    // } else {
-    //     mergeSVs(chr_sv_calls, dbscan_epsilon, dbscan_min_pts, false);
-    // }
     mergeSVs(chr_sv_calls, dbscan_epsilon, dbscan_min_pts, false);
 
     int region_sv_count = getSVCount(chr_sv_calls);
@@ -1185,7 +1161,7 @@ void SVCaller::saveToVCF(const std::unordered_map<std::string, std::vector<SVCal
     vcf_stream << "##fileDate=" << buffer << std::endl;
 
     // Add source
-    std::string sv_method = "ContextSV" + std::string(VERSION);
+    std::string sv_method = "ContextSV v" + std::to_string(VERSION_MAJOR) + "." + std::to_string(VERSION_MINOR) + "." + std::to_string(VERSION_PATCH);
     std::string source = "##source=" + sv_method;
     vcf_stream << source << std::endl;
 
diff --git a/src/sv_object.cpp b/src/sv_object.cpp
index d6f46b82..b68ef3c7 100644
--- a/src/sv_object.cpp
+++ b/src/sv_object.cpp
@@ -73,7 +73,6 @@ void mergeSVs(std::vector<SVCall>& sv_calls, double epsilon, int min_pts, bool k
             continue;
         }
 
-        DEBUG_PRINT("Merging SV type: " + getSVTypeString(sv_type) + " (epsilon=" + std::to_string(epsilon) + ", min_pts=" + std::to_string(min_pts) + ", num SVs=" + std::to_string(sv_calls.size()) + ")");
         std::vector<SVCall> merged_sv_type_calls;
 
         // Create a vector of SV calls for the current SV type and size interval
@@ -81,12 +80,13 @@ void mergeSVs(std::vector<SVCall>& sv_calls, double epsilon, int min_pts, bool k
         std::copy_if(sv_calls.begin(), sv_calls.end(), std::back_inserter(sv_type_calls), [sv_type](const SVCall& sv_call) {
             return sv_call.sv_type == sv_type;
         });
+        DEBUG_PRINT("Merging SV type: " + getSVTypeString(sv_type) + " (epsilon=" + std::to_string(epsilon) + ", min_pts=" + std::to_string(min_pts) + ", num SVs=" + std::to_string(sv_type_calls.size()) + ")");
 
         if (sv_type_calls.size() < 2) {
             // Add all unclustered points to the merged list
             for (const auto& sv_call : sv_type_calls) {
                 SVCall noise_sv_call = sv_call;
-                merged_sv_type_calls.push_back(noise_sv_call);
+                merged_sv_calls.push_back(noise_sv_call);
             }
             continue;
         }
diff --git a/src/utils.cpp b/src/utils.cpp
index a27263b7..20dd479e 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -4,6 +4,7 @@
 #include <sys/resource.h>  // getrusage
 #include <iomanip>
 #include <stdio.h>
+#include <array>
 #include <string>
 #include <iostream>
 #include <fstream>
@@ -11,72 +12,8 @@
 /// @endcond
 
 
-// Define print mutex
 std::mutex print_mtx;
 
-// Print a progress bar
-void printProgress(int progress, int total)
-{
-    float percent = (float)progress / (float)total * 100.0;
-    int num_hashes = (int)(percent / 2.0);
-
-    // Print the progress bar
-    printf("\r[");
-    for (int i = 0; i < num_hashes; i++)
-    {
-        printf("#");
-    }
-    for (int i = 0; i < 50 - num_hashes; i++)
-    {
-        printf(" ");
-    }
-    printf("] %3.2f%%", percent);
-    fflush(stdout);
-
-    // Print a new line if finished
-    if (progress == total)
-    {
-        printf("\n");
-    }
-}
-
-// Run bcftools to determine the chr notation of a VCF file
-bool isChrNotation(std::string vcf_filepath)
-{
-    // Create the command to extract the chromosomes from the VCF
-    std::string cmd = "bcftools query -f '%CHROM\n' " + vcf_filepath + " 2>/dev/null";
-
-    // Open the pipe
-    FILE* pipe = popen(cmd.c_str(), "r");
-    if (pipe == NULL)
-    {
-        std::cerr << "Error: could not open pipe" << std::endl;
-        return false;
-    }
-
-    // Read the first line
-    const int line_size = 256;
-    char buffer[line_size];
-    if (!fgets(buffer, line_size, pipe))
-    {
-        std::cerr << "Error reading from pipe" << std::endl;
-        return false;
-    }
-
-    // Check if the first line contains "chr" using std::string::find
-    bool is_chr_notation = false;
-    std::string line(buffer);
-    if (line.find("chr") != std::string::npos)
-    {
-        is_chr_notation = true;
-    }
-    
-    // Close the pipe
-    pclose(pipe);
-
-    return is_chr_notation;
-}
-
 // Thread-safe print message function
 void printMessage(std::string message)
 {
@@ -102,15 +39,6 @@ std::string getElapsedTime(std::chrono::high_resolution_clock::time_point start,
     return elapsed_time;
 }
 
-// Function to remove the 'chr' prefix from chromosome names
-std::string removeChrPrefix(std::string chr)
-{
-    if (chr.find("chr") != std::string::npos) {
-        return chr.substr(3);
-    }
-    return chr;
-}
-
 void printMemoryUsage(const std::string& functionName) {
     struct rusage usage;
     getrusage(RUSAGE_SELF, &usage);
@@ -141,3 +69,5 @@ void closeJSON(const std::string &filepath)
     json_file << "]";  // Close the JSON array
     json_file.close();
 }
+
+
diff --git a/tests/data/ref.fa b/tests/data/ref.fa
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/data/snps.vcf.gz b/tests/data/snps.vcf.gz
deleted file mode 100644
index 6f0175b0..00000000
Binary files a/tests/data/snps.vcf.gz and /dev/null differ
diff --git a/tests/data/snps.vcf.gz.csi b/tests/data/snps.vcf.gz.csi
deleted file mode 100644
index 2c809709..00000000
Binary files a/tests/data/snps.vcf.gz.csi and /dev/null differ
diff --git a/tests/data/snps.vcf.gz.tbi b/tests/data/snps.vcf.gz.tbi
deleted file mode 100644
index 5110e913..00000000
Binary files a/tests/data/snps.vcf.gz.tbi and /dev/null differ
diff --git a/tests/data/test.bam b/tests/data/test.bam
deleted file mode 100644
index 75a95e3e..00000000
Binary files a/tests/data/test.bam and /dev/null differ
diff --git a/tests/data/test.bam.bai b/tests/data/test.bam.bai
deleted file mode 100644
index 08baf8e9..00000000
Binary files a/tests/data/test.bam.bai and /dev/null differ
diff --git a/tests/test_general.py b/tests/test_general.py
index ff65faba..c1c59da7 100644
--- a/tests/test_general.py
+++ b/tests/test_general.py
@@ -3,22 +3,10 @@
 """
 
 import os
+import subprocess
 import sys
 import pytest
 
-# Import lib from the parent directory
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from lib import contextsv
-
-# Get the path to the test data directory.
-TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
-
-# Get the path to the test data files.
-TEST_BAM_FILE = os.path.join(TEST_DATA_DIR, 'test.bam')
-TEST_REF_FILE = ""
-TEST_SNPS_FILE = os.path.join(TEST_DATA_DIR, 'snps.vcf.gz')
-TEST_PFB_FILE = ""
 
 # Set the output directory.
 TEST_OUTDIR = os.path.join(os.path.dirname(__file__), 'output')
@@ -26,62 +14,155 @@
 # Check if running unit tests on GitHub Actions
 local_dir = os.path.expanduser("~/github/ContextSV")
 if os.getcwd() == local_dir:
-    test_dir_path = os.path.join(local_dir, 'tests/data')
+    TEST_DATA_DIR = os.path.join(local_dir, 'tests/data')
 else:
-    test_dir_path = os.path.abspath(str("TestData"))
-
-TEST_REF_FILE = os.path.join(test_dir_path, 'GRCh37_21.fa')
-TEST_BAM_FILE = os.path.join(test_dir_path, 'ONT_Kit14_21.bam')
-TEST_SNPS_FILE = os.path.join(test_dir_path, 'snps_21.vcf.gz')
-
-
-def test_run():
-    """Test the run function of the contextsv module with a small dataset."""
-    
-    # Set input parameters.
-    input_data = contextsv.InputData()
-    input_data.setLongReadBam(TEST_BAM_FILE)
-    input_data.setRefGenome(TEST_REF_FILE)
-    input_data.setSNPFilepath(TEST_SNPS_FILE)
-    input_data.setChromosome("21")
-    input_data.setRegion("14486099-14515105")
-    input_data.setThreadCount(1)
-    input_data.setAlleleFreqFilepaths("")
-    input_data.setHMMFilepath("")
-    input_data.setOutputDir(TEST_OUTDIR)
-    input_data.saveCNVData(True)
-    
-    # Run the analysis.
-    contextsv.run(input_data)
-
-    # Check that the output file exists.
-    output_file = os.path.join(TEST_OUTDIR, 'output.vcf')
-    assert os.path.exists(output_file)
-
-    # Check that the VCF file is not empty.
-    assert os.path.getsize(output_file) > 0
-
-    # Check that the VCF file has the correct number of lines.
-    with open(output_file, 'r', encoding='utf-8') as f:
-        assert len(f.readlines()) == 22
-
-    # Check that the VCF file has the correct header, and the correct
-    # VCF CHROM, POS, and INFO fields in the next 2 lines.
-    header_line = 17
-    with open(output_file, 'r', encoding='utf-8') as f:
-        for i, line in enumerate(f):
-            if i == header_line:
-                assert line.strip() == "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE"
-            elif i == header_line + 1:
-                # Get the first, second, and eighth fields from the line
-                fields = line.strip().split('\t')
-                assert fields[0] == "21"
-                assert fields[1] == "14458394"
-                assert fields[7] == "END=14458394;SVTYPE=INS;SVLEN=1344;SUPPORT=1;SVMETHOD=CONTEXTSVv0.1;ALN=CIGARINS;CLIPSUP=0;REPTYPE=NA;HMM=0.000000"
-            elif i == header_line + 2:
-                fields = line.strip().split('\t')
-                assert fields[0] == "21"
-                assert fields[1] == "14502888"
-                assert fields[7] == "END=14502953;SVTYPE=BOUNDARY;SVLEN=65;SUPPORT=1;SVMETHOD=CONTEXTSVv0.1;ALN=BOUNDARY;CLIPSUP=0;REPTYPE=NA;HMM=-4.606171"
+    TEST_DATA_DIR = os.path.abspath(str("tests/data"))
+
+TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
+BAM_FILE = os.path.join(TEST_DATA_DIR, 'chr3_test.bam')
+REF_FILE = os.path.join(TEST_DATA_DIR, 'GRCh38_noalts_chr3.fa')
+SNPS_FILE = os.path.join(TEST_DATA_DIR, 'chr3_test.snps.vcf.gz')
+GAP_FILE = os.path.join(TEST_DATA_DIR, 'Gaps-HG38-UCSC-chr3.bed')
+HMM_FILE = os.path.join(TEST_DATA_DIR, 'wgs_test.hmm')
+
+# Create a PFB file pointing to the SNP file in the test data directory
+PFB_FILE = os.path.join(TEST_DATA_DIR, 'chr3_pfb.txt')
+# Remove any existing PFB file to avoid conflicts
+if os.path.exists(PFB_FILE):
+    os.remove(PFB_FILE)
+
+PFB_SNP_FILE = os.path.join(TEST_DATA_DIR, 'chr3_gnomad_snps_isec.vcf.gz')
+with open(PFB_FILE, 'w', encoding='utf-8') as pf:
+    pf.write(f"3={PFB_SNP_FILE}\n")
+
+def test_run_help():
+    """Ensure the binary runs with --help and exits cleanly."""
+    result = subprocess.run(
+        ["./build/contextsv", "--help"],
+        capture_output=True,
+        text=True,
+        check=True
+    )
+
+    # Print output for debugging purposes
+    print("STDOUT:", result.stdout)
+
+    # Check process exited successfully
+    assert result.returncode == 0, f"Non-zero exit: {result.returncode}\n{result.stderr}"
+    assert result.stdout, "No output from --help"
+    assert "Usage:" in result.stdout, "Help text missing 'Usage:'"
+    assert "Options:" in result.stdout, "Help text missing 'Options:'"
+
+def test_run_version():
+    """Ensure the binary runs with --version and exits cleanly."""
+    result = subprocess.run(
+        ["./build/contextsv", "--version"],
+        capture_output=True,
+        text=True,
+        check=True
+    )
+
+    # Print output for debugging purposes
+    print("STDOUT:", result.stdout)
+
+    # Check process exited successfully
+    assert result.returncode == 0, f"Non-zero exit: {result.returncode}\n{result.stderr}"
+    assert result.stdout, "No output from --version"
+
+def test_run_basic():
+    """Run ContextSV with basic required parameters."""
+    out_dir = os.path.join(TEST_OUTDIR, 'test_run_basic')
+    os.makedirs(out_dir, exist_ok=True)
+
+    vcf_file = os.path.join(out_dir, 'output.vcf')
+    if os.path.exists(vcf_file):
+        os.remove(vcf_file)
+    json_file = os.path.join(out_dir, 'CNVCalls.json')
+    if os.path.exists(json_file):
+        os.remove(json_file)
+
+    result = subprocess.run(
+        ["./build/contextsv",
+         "--bam", BAM_FILE,
+         "--ref", REF_FILE,
+         "--snp", SNPS_FILE,
+         "--outdir", out_dir,
+         "--hmm", HMM_FILE,
+         "--eth", "nfe",
+         "--pfb", PFB_FILE,
+         "--sample-size", "20",
+         "--min-cnv", "2000",
+         "--eps", "0.1",
+         "--min-pts-pct", "0.1",
+         "--assembly-gaps", GAP_FILE,
+         "--chr", "chr3",
+         "--save-cnv",
+         "--debug"
+        ],
+        capture_output=True,
+        text=True,
+        check=True
+    )
+
+    # Print output for debugging purposes
+    print("STDOUT:", result.stdout)
+    print("STDERR:", result.stderr)
+
+    # Check process exited successfully
+    assert result.returncode == 0, f"Non-zero exit: {result.returncode}\n{result.stderr}"
+    assert "ContextSV finished successfully!" in result.stdout, "Did not complete successfully"
+
+    # Check for expected output files
+    expected_files = [
+        os.path.join(out_dir, 'CNVCalls.json'),
+        os.path.join(out_dir, 'output.vcf')
+    ]
+    for ef in expected_files:
+        assert os.path.isfile(ef), f"Expected output file not found: {ef}"
+
+    # Find the large duplication in the VCF output
+    # chr3	61149366	.	N	<DUP>	.	PASS	END=61925600;SVTYPE=DUP;SVLEN=776235;SVMETHOD=ContextSVv1.0.0-1-g4bd038c;ALN=SPLIT,HMM;HMM=-2533.541937;SUPPORT=63;CLUSTER=23;ALNOFFSET=0;CN=6	GT:DP	1/1:63
+    found_dup = False
+    with open(vcf_file, 'r') as vf:
+        for line in vf:
+            if line.startswith('#'):
+                continue
+            fields = line.strip().split('\t')
+            if len(fields) < 8:
+                continue
+            chrom, pos, id, ref, alt, qual, filter, info = fields[:8]
+
+            if (chrom == 'chr3' and pos == '61149366' and alt == '<DUP>'):
+                found_dup = True
+                info_dict = dict(item.split('=') for item in info.split(';') if '=' in item)
+                assert info_dict.get('SVTYPE') == 'DUP', "SVTYPE is not DUP"
+                assert int(info_dict.get('SVLEN', 0)) == 776235, f"SVLEN is not 776235, got {info_dict.get('SVLEN')}"
+                assert int(info_dict.get('CN', 0)) == 6, f"CN is not 6, got {info_dict.get('CN')}"
+                break
+
+    assert found_dup, "Expected duplication not found in VCF output"
+
+    # Find the large duplication in the CNVCalls.json output
+    # [
+        # {
+        # "chromosome": "chr3",
+        # "start": 61149366,
+        # "end": 61925600,
+        # "sv_type": "DUP",
+        # "likelihood": -2533.54,
+        # "size": 776235,
+
+    found_dup_json = False
+    import json
+    with open(json_file, 'r') as jf:
+        cnv_data = json.load(jf)
+        for entry in cnv_data:
+            if (entry.get('chromosome') == 'chr3' and
+                entry.get('start') == 61149366 and
+                entry.get('end') == 61925600 and
+                entry.get('sv_type') == 'DUP'):
+                found_dup_json = True
+                assert entry.get('size') == 776235, f"Size is not 776235, got {entry.get('size')}"
                 break
-            
\ No newline at end of file
+            
+    assert found_dup_json, "Expected duplication not found in CNVCalls.json output"