diff --git a/docker/Dockerfile b/docker/Dockerfile index d078868..5037912 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,13 +1,16 @@ FROM python:3.10 ENV vcf_validator_version=0.10.2 -ENV NXF_VER=22.10.6 +ENV NXF_VER=23.10.0 WORKDIR /opt # Install JAVA and Node RUN apt update && apt install -y default-jdk nodejs npm +# Install bcftools +RUN apt install -y bcftools + # Install VCF validator RUN curl -LJo /usr/local/bin/vcf_validator https://github.com/EBIvariation/vcf-validator/releases/download/v${vcf_validator_version}/vcf_validator_linux \ && curl -LJo /usr/local/bin/vcf_assembly_checker https://github.com/EBIvariation/vcf-validator/releases/download/v${vcf_validator_version}/vcf_assembly_checker_linux \ diff --git a/docs/installation.md b/docs/installation.md index 5525d0f..a38cdd8 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -40,10 +40,15 @@ To upgrade to the newest version, run `pip install --upgrade eva-sub-cli`. ## 3. From source natively +Advanced users who want to manage their dependencies in a more granular way can install from source natively. This installation method requires the following: * Python 3.8+ * [Nextflow](https://www.nextflow.io/docs/latest/getstarted.html) 21.10+ * [biovalidator](https://github.com/elixir-europe/biovalidator) 2.1.0+ * [vcf-validator](https://github.com/EBIvariation/vcf-validator) 0.9.7+ +* [bcftools](https://www.htslib.org/download/) 1.14+ -Install each of these and ensure they are included in your PATH. Then install the latest eva-sub-cli release as previously described. +Install each of these and ensure they are included in your PATH. Then install the latest eva-sub-cli release from [PyPI](https://pypi.org/project/eva-sub-cli/): +```bash +pip install eva-sub-cli +``` \ No newline at end of file diff --git a/eva_sub_cli/jinja_templates/html/file_validation.html b/eva_sub_cli/jinja_templates/html/file_validation.html index dfd6c6d..6591108 100644 --- a/eva_sub_cli/jinja_templates/html/file_validation.html +++ b/eva_sub_cli/jinja_templates/html/file_validation.html @@ -6,6 +6,8 @@ {{ assembly_check(result) }} {% elif check_type == "vcf_check" %} {{ vcf_check(result) }} + {% elif check_type == "norm_check" %} + {{ norm_check(result) }} {% endif %} {% endfor %} {%- endmacro %} @@ -90,4 +92,33 @@ {% endif %} {%- endmacro %} +{% macro norm_check(norm_check_result) %} + {% set error_count = norm_check_result.get("nb_error", 0) %} + {% set expand_icon = "" %} + {% if error_count > 0 %} + {% set expand_icon = "▶" %} + {% set icon = "❌" %} + {% set row_class = "report-section fail collapsible" %} + {% else %} + {% set icon = "✔" %} + {% set row_class = "report-section pass" %} + {% endif %} +
{{ expand_icon }} {{ icon }} Normalisation check: {{ error_count }} errors ({{ norm_check_result.get("nb_total", 0) }} total lines: {{ norm_check_result.get("nb_split", 0) }} split, {{ norm_check_result.get("nb_realigned", 0) }} realigned, {{ norm_check_result.get("nb_skipped", 0) }} skipped)
+ {% set error_list = norm_check_result.get("error_list") %} + {% if error_list%} +
+
First 10 errors are below. Full report: {{ norm_check_result.get('report_path', '') }}
+ + + + + {% for error in error_list[:10] %} + + + + {% endfor %} +
CategoryError
Error {{ error }}
+
+ {% endif %} +{%- endmacro %} diff --git a/eva_sub_cli/jinja_templates/html/report.html b/eva_sub_cli/jinja_templates/html/report.html index 8630012..8173668 100644 --- a/eva_sub_cli/jinja_templates/html/report.html +++ b/eva_sub_cli/jinja_templates/html/report.html @@ -70,7 +70,8 @@

Metadata validation results

VCF validation results

Checks whether each file is compliant with the VCF specification. - Also checks whether the variants' reference alleles match against the reference assembly. + Also checks whether the variants' reference alleles match against the reference assembly, + and whether the file can be normalised (if necessary) using bcftools norm.
{% for file_name in vcf_files %} {% if file_name != "pass"%} diff --git a/eva_sub_cli/jinja_templates/text/file_validation.txt b/eva_sub_cli/jinja_templates/text/file_validation.txt index e27fa75..3fc94b8 100644 --- a/eva_sub_cli/jinja_templates/text/file_validation.txt +++ b/eva_sub_cli/jinja_templates/text/file_validation.txt @@ -6,6 +6,8 @@ {{ assembly_check(result) }} {% elif check_type == "vcf_check" %} {{ vcf_check(result) }} + {% elif check_type == "norm_check" %} + {{ norm_check(result) }} {% endif %} {% endfor %} {%- endmacro %} @@ -56,3 +58,21 @@ {% endfor %} {% endif %} {%- endmacro %} + +{% macro norm_check(norm_check_result) %} + {% set error_count = norm_check_result.get("nb_error", 0) %} + {% if error_count > 0 %} + {% set icon = "\u274C" %} + {% else %} + {% set icon = "\u2714" %} + {% endif %} + {{ icon }} Normalisation check: {{ error_count }} errors ({{ norm_check_result.get("nb_total", 0) }} total lines: {{ norm_check_result.get("nb_split", 0) }} split, {{ norm_check_result.get("nb_realigned", 0) }} realigned, {{ norm_check_result.get("nb_skipped", 0) }} skipped) + + {% set error_list = norm_check_result.get("error_list") %} + {% if error_list%} + First 10 errors are below. Full report: {{ norm_check_result.get('report_path', '') }} + {% for error in error_list[:10] %} + Error: {{ error }} + {% endfor %} + {% endif %} +{%- endmacro %} diff --git a/eva_sub_cli/jinja_templates/text/report.txt b/eva_sub_cli/jinja_templates/text/report.txt index a462d92..68bfde9 100644 --- a/eva_sub_cli/jinja_templates/text/report.txt +++ b/eva_sub_cli/jinja_templates/text/report.txt @@ -29,7 +29,8 @@ For requirements, please refer to the EVA website (https://www.ebi.ac.uk/eva/?Su VCF VALIDATION RESULTS Checks whether each file is compliant with the VCF specification (http://samtools.github.io/hts-specs/VCFv4.4.pdf). -Also checks whether the variants' reference alleles match against the reference assembly. +Also checks whether the variants' reference alleles match against the reference assembly, and whether the file can be +normalised (if necessary) using bcftools norm (https://samtools.github.io/bcftools/bcftools.html#norm). {% for file_name in vcf_files %} {% if file_name != "pass"%} diff --git a/eva_sub_cli/nextflow/validation.nf b/eva_sub_cli/nextflow/validation.nf index 0fe26a8..7a409b7 100644 --- a/eva_sub_cli/nextflow/validation.nf +++ b/eva_sub_cli/nextflow/validation.nf @@ -23,7 +23,8 @@ params.metadata_xlsx = null params.executable = [ "vcf_validator": "vcf_validator", "vcf_assembly_checker": "vcf_assembly_checker", - "biovalidator": "biovalidator" + "biovalidator": "biovalidator", + "bcftools": "bcftools" ] // python scripts - installed as part of eva-sub-cli params.python_scripts = [ @@ -83,11 +84,11 @@ workflow { // VCF checks check_vcf_valid(vcf_and_ref_ch) check_vcf_reference(vcf_and_ref_ch) + check_vcf_normalised(vcf_and_ref_ch) generate_file_size_and_md5_digests(vcf_files) collect_file_size_and_md5(generate_file_size_and_md5_digests.out.file_size_and_digest_info.collect()) - // Metadata conversion if (params.metadata_xlsx && !params.metadata_json){ convert_xlsx_2_json(joinBasePath(params.metadata_xlsx)) @@ -177,6 +178,30 @@ process check_vcf_reference { """ } +/* + * Check that the VCF file can be normalised using bcftools + */ +process check_vcf_normalised { + publishDir output_dir, + overwrite: true, + mode: "copy" + + input: + tuple path(vcf), path(fasta), path(report) + + output: + // TODO should we output the normalised file? + path "norm_check/*.log", emit: normalisation_log + + script: + """ + mkdir norm_check + # Trap exit code so failures can be reported + $params.executable.bcftools norm --no-version -cw -f $fasta -O u $vcf 1> /dev/null 2> norm_check/${vcf}_bcftools_norm.log \ + || echo "exit code \$?" + """ +} + process generate_file_size_and_md5_digests { input: path(vcf_file) diff --git a/eva_sub_cli/report.py b/eva_sub_cli/report.py index 88bfd6f..1c1b55c 100644 --- a/eva_sub_cli/report.py +++ b/eva_sub_cli/report.py @@ -19,7 +19,7 @@ def generate_report(validation_results, validation_date, submission_dir, vcf_fas consent_statement_required, subdir, template_file): results_for_report = {k: v for k, v in validation_results.items() if k != 'ready_for_submission_to_eva'} vcf_files = sorted(set([file_name - for check in results_for_report if check in ["vcf_check", "assembly_check"] + for check in results_for_report if check in ["vcf_check", "assembly_check", "norm_check"] for file_name in results_for_report[check] ])) fasta_files = sorted([file_name for file_name in results_for_report['fasta_check']]) diff --git a/eva_sub_cli/validators/docker_validator.py b/eva_sub_cli/validators/docker_validator.py index 6073776..8bd9be0 100644 --- a/eva_sub_cli/validators/docker_validator.py +++ b/eva_sub_cli/validators/docker_validator.py @@ -12,7 +12,7 @@ logger = logging_config.get_logger(__name__) default_container_image = 'ebivariation/eva-sub-cli' -default_container_tag = 'v0.0.6' +default_container_tag = 'v0.0.7.dev0' container_validation_dir = '/opt/vcf_validation' container_validation_output_dir = 'vcf_validation_output' diff --git a/eva_sub_cli/validators/native_validator.py b/eva_sub_cli/validators/native_validator.py index d8ebe9b..206f538 100644 --- a/eva_sub_cli/validators/native_validator.py +++ b/eva_sub_cli/validators/native_validator.py @@ -13,7 +13,7 @@ class NativeValidator(Validator): def __init__(self, mapping_file, submission_dir, project_title, metadata_json=None, metadata_xlsx=None, shallow_validation=False, vcf_validator_path='vcf_validator', assembly_checker_path='vcf_assembly_checker', biovalidator_path='biovalidator', - submission_config=None, nextflow_config=None): + bcftools_path='bcftools', submission_config=None, nextflow_config=None): super().__init__(mapping_file, submission_dir, project_title, metadata_json=metadata_json, metadata_xlsx=metadata_xlsx, shallow_validation=shallow_validation, submission_config=submission_config) @@ -21,6 +21,7 @@ def __init__(self, mapping_file, submission_dir, project_title, metadata_json=No self.vcf_validator_path = vcf_validator_path self.assembly_checker_path = assembly_checker_path self.biovalidator_path = biovalidator_path + self.bcftools_path = bcftools_path @staticmethod def _validation_file_path_for(file_path): @@ -57,13 +58,15 @@ def get_validation_cmd(self): f" --executable.vcf_validator {self.vcf_validator_path}", f" --executable.vcf_assembly_checker {self.assembly_checker_path}", f" --executable.biovalidator {self.biovalidator_path}", + f" --executable.bcftools {self.bcftools_path}", f" -c {self.nextflow_config} " if self.nextflow_config else "" ]) def verify_executables_installed(self): for name, path in [('vcf-validator', self.vcf_validator_path), ('vcf-assembly-checker', self.assembly_checker_path), - ('biovalidator', self.biovalidator_path)]: + ('biovalidator', self.biovalidator_path), + ('bcftools', self.bcftools_path)]: try: self._run_quiet_command( f"Check {name} is installed and available on the path", diff --git a/eva_sub_cli/validators/validation_results_parsers.py b/eva_sub_cli/validators/validation_results_parsers.py index f38344e..4c5bb4a 100644 --- a/eva_sub_cli/validators/validation_results_parsers.py +++ b/eva_sub_cli/validators/validation_results_parsers.py @@ -113,6 +113,19 @@ def vcf_check_errors_is_critical(error): return True +def parse_bcftools_norm_report(norm_report): + total = split = realigned = skipped = 0 + error_list = [] + with open(norm_report) as open_file: + for line in open_file: + if line.startswith('Lines total/split/realigned/skipped:'): + # Lines total/split/realigned/skipped: 2/0/1/0 + total, split, realigned, skipped = line.strip().split()[-1].split('/') + else: + error_list.append(line.strip()) + return error_list, int(total), int(split), int(realigned), int(skipped) + + def parse_biovalidator_validation_results(metadata_check_file): """ Read the biovalidator's report and extract the list of validation errors diff --git a/eva_sub_cli/validators/validator.py b/eva_sub_cli/validators/validator.py index 236ed42..3070927 100755 --- a/eva_sub_cli/validators/validator.py +++ b/eva_sub_cli/validators/validator.py @@ -16,7 +16,7 @@ from eva_sub_cli.report import generate_html_report, generate_text_report from eva_sub_cli.validators.validation_results_parsers import parse_assembly_check_log, parse_assembly_check_report, \ parse_biovalidator_validation_results, convert_metadata_sheet, convert_metadata_row, convert_metadata_attribute, \ - parse_vcf_check_report, parse_metadata_property + parse_vcf_check_report, parse_metadata_property, parse_bcftools_norm_report VALIDATION_OUTPUT_DIR = "validation_output" VALIDATION_RESULTS_KEY = 'validation_results' @@ -153,7 +153,7 @@ def verify_ready_for_submission_to_eva(self): """ Checks if all the validation are passed """ return all(( all((value.get('pass', False) is True for key, value in self.results.items() if - key in ['vcf_check', 'assembly_check', 'fasta_check', 'sample_check', 'metadata_check', 'evidence_type_check'])), + key in ['vcf_check', 'assembly_check', 'fasta_check', 'sample_check', 'norm_check', 'metadata_check', 'evidence_type_check'])), any(( self.results['shallow_validation']['requested'] is False, self.results['shallow_validation'].get('required', True) is False @@ -167,6 +167,7 @@ def _collect_validation_workflow_results(self): self._collect_trim_down_metrics() self._collect_vcf_check_results() self._collect_assembly_check_results() + self._collect_norm_check_results() self._load_sample_check_results() self._load_evidence_check_results() self._load_fasta_check_results() @@ -189,6 +190,11 @@ def _assess_validation_results(self): for vcf_name, asm_check in self.results.get('assembly_check', {}).items())) self.results['assembly_check']['pass'] = asm_nb_mismatch_result and asm_nb_error_result + # norm_check result + norm_check_result = all((norm_check.get('nb_error', 1) == 0 + for vcf_name, norm_check in self.results.get('norm_check', {}).items())) + self.results['norm_check']['pass'] = norm_check_result + # fasta_check result fasta_check_result = all((fa_file_check.get('all_insdc', False) is True for fa_file, fa_file_check in self.results.get('fasta_check', {}).items())) @@ -249,6 +255,10 @@ def _assembly_check_text_report(self, vcf_name): os.path.join(self.output_dir, 'assembly_check', vcf_name + '*text_assembly_report*') ) + @lru_cache + def _normalisation_log(self, vcf_name): + return resolve_single_file_path(os.path.join(self.output_dir, 'norm_check', vcf_name + '_bcftools_norm.log')) + @cached_property def _sample_check_yaml(self): return resolve_single_file_path(os.path.join(self.output_dir, 'other_validations', 'sample_checker.yml')) @@ -310,6 +320,25 @@ def _collect_assembly_check_results(self): 'total': total } + def _collect_norm_check_results(self): + self.results['norm_check'] = {} + for vcf_file in self.vcf_files: + vcf_name = os.path.basename(vcf_file) + normalisation_log = self._normalisation_log(vcf_name) + if normalisation_log: + error_list, nb_total, nb_split, nb_realigned, nb_skipped = parse_bcftools_norm_report(normalisation_log) + else: + error_list, nb_total, nb_split, nb_realigned, nb_skipped = (['Process failed'], 0, 0, 0, 0) + self.results['norm_check'][vcf_name] = { + 'report_path': normalisation_log, + 'error_list': error_list, + 'nb_error': len(error_list), + 'nb_total': nb_total, + 'nb_split': nb_split, + 'nb_realigned': nb_realigned, + 'nb_skipped': nb_skipped + } + def _load_fasta_check_results(self): for fasta_file in self.fasta_files: fasta_file_name = os.path.basename(fasta_file) @@ -338,8 +367,6 @@ def _load_evidence_check_results(self): self._update_metadata_with_evidence_type() - - def _collect_metadata_results(self): self.results['metadata_check'] = {} self._load_spreadsheet_conversion_errors() diff --git a/tests/build_and_test_docker_locally.py b/tests/build_and_test_docker_locally.py index 6b2b9e4..b65fb56 100644 --- a/tests/build_and_test_docker_locally.py +++ b/tests/build_and_test_docker_locally.py @@ -233,13 +233,12 @@ def get_docker_validation_cmd(self): def assert_validation_results(self, validator, expected_sample_checker, expected_metadata_files_json, expected_metadata_val, expected_semantic_val, expected_evidence_type_val): + # Assert VCF format check vcf_format_dir = os.path.join(validator.output_dir, 'vcf_format') self.assertTrue(os.path.exists(vcf_format_dir)) - - vcf_format_log_file = os.path.join(vcf_format_dir, 'input_passed.vcf.vcf_format.log') - self.assertTrue(os.path.exists(vcf_format_log_file)) - - with open(vcf_format_log_file) as vcf_format_log_file: + vcf_format_log_path = os.path.join(vcf_format_dir, 'input_passed.vcf.vcf_format.log') + self.assertTrue(os.path.exists(vcf_format_log_path)) + with open(vcf_format_log_path) as vcf_format_log_file: vcf_format_logs = vcf_format_log_file.readlines() self.assertEqual('[info] According to the VCF specification, the input file is valid\n', vcf_format_logs[2]) @@ -249,18 +248,24 @@ def assert_validation_results(self, validator, expected_sample_checker, expected self.assertEqual('According to the VCF specification, the input file is valid\n', text_report_content[0]) - # assert assembly report + # Assert assembly report check assembly_check_dir = os.path.join(validator.output_dir, 'assembly_check') self.assertTrue(os.path.exists(assembly_check_dir)) - - assembly_check_log_file = os.path.join(assembly_check_dir, 'input_passed.vcf.assembly_check.log') - self.assertTrue(os.path.exists(assembly_check_log_file)) - - with open(assembly_check_log_file) as assembly_check_log_file: + assembly_check_log_path = os.path.join(assembly_check_dir, 'input_passed.vcf.assembly_check.log') + self.assertTrue(os.path.exists(assembly_check_log_path)) + with open(assembly_check_log_path) as assembly_check_log_file: assembly_check_logs = assembly_check_log_file.readlines() self.assertEqual('[info] Number of matches: 247/247\n', assembly_check_logs[4]) self.assertEqual('[info] Percentage of matches: 100%\n', assembly_check_logs[5]) + # Assert normalisation check + norm_check_dir = os.path.join(validator.output_dir, 'norm_check') + self.assertTrue(os.path.exists(norm_check_dir)) + norm_check_log_path = os.path.join(norm_check_dir, 'input_passed.vcf_bcftools_norm.log') + with open(norm_check_log_path) as norm_check_log_file: + norm_check_logs = norm_check_log_file.readlines() + self.assertEqual('[E::faidx_adjust_position] The sequence "1" was not found\n', norm_check_logs[0]) + # Assert Samples concordance self.assert_yaml_file(validator._sample_check_yaml, expected_sample_checker) diff --git a/tests/resources/norm_check/invalid.vcf_bcftools_norm.log b/tests/resources/norm_check/invalid.vcf_bcftools_norm.log new file mode 100644 index 0000000..e3e3bb8 --- /dev/null +++ b/tests/resources/norm_check/invalid.vcf_bcftools_norm.log @@ -0,0 +1,5 @@ +NON_ACGTN_ALT chr1 49338976 ]chr1:49277505]T +NON_ACGTN_ALT chr1 49997014 TAT[chr1:50014208[ +NON_ACGTN_ALT chr1 50014208 ]chr1:49997014]ATT +NON_ACGTN_ALT chr1 191611692 [chr8:41723769[A +Lines total/split/realigned/skipped: 152/0/0/0 diff --git a/tests/resources/validation_reports/expected_report_metadata_json.html b/tests/resources/validation_reports/expected_metadata_json_report.html similarity index 99% rename from tests/resources/validation_reports/expected_report_metadata_json.html rename to tests/resources/validation_reports/expected_metadata_json_report.html index 6945ce8..b0cde25 100644 --- a/tests/resources/validation_reports/expected_report_metadata_json.html +++ b/tests/resources/validation_reports/expected_metadata_json_report.html @@ -158,7 +158,8 @@

Metadata validation results

VCF validation results

Checks whether each file is compliant with the VCF specification. - Also checks whether the variants' reference alleles match against the reference assembly. + Also checks whether the variants' reference alleles match against the reference assembly, + and whether the file can be normalised (if necessary) using bcftools norm.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
@@ -215,9 +216,31 @@

input_fail.vcf

+
❌ Normalisation check: 4 errors (152 total lines: 0 split, 0 realigned, 0 skipped)
+
+
First 10 errors are below. Full report: /path/to/vcf_failed/norm/log
+ + + + + + + + + + + + + + + + +
CategoryError
Error NON_ACGTN_ALT chr1 49338976 ]chr1:49277505]T
Error NON_ACGTN_ALT chr1 49997014 TAT[chr1:50014208[
Error NON_ACGTN_ALT chr1 50014208 ]chr1:49997014]ATT
Error NON_ACGTN_ALT chr1 191611692 [chr8:41723769[A
+

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
+
✔ Normalisation check: 0 errors (152 total lines: 0 split, 0 realigned, 0 skipped)

Sample name concordance check

diff --git a/tests/resources/validation_reports/expected_report_metadata_json.txt b/tests/resources/validation_reports/expected_metadata_json_report.txt similarity index 92% rename from tests/resources/validation_reports/expected_report_metadata_json.txt rename to tests/resources/validation_reports/expected_metadata_json_report.txt index 58e7d4e..0b1e71e 100644 --- a/tests/resources/validation_reports/expected_report_metadata_json.txt +++ b/tests/resources/validation_reports/expected_metadata_json_report.txt @@ -71,7 +71,8 @@ For requirements, please refer to the EVA website (https://www.ebi.ac.uk/eva/?Su - VCF VALIDATION RESULTS Checks whether each file is compliant with the VCF specification (http://samtools.github.io/hts-specs/VCFv4.4.pdf). -Also checks whether the variants' reference alleles match against the reference assembly. +Also checks whether the variants' reference alleles match against the reference assembly, and whether the file can be +normalised (if necessary) using bcftools norm (https://samtools.github.io/bcftools/bcftools.html#norm). input_fail.vcf ❌ Assembly check: 26/36 (72.22%) First 10 errors per category are below. Full report: /path/to/assembly_failed/report @@ -89,9 +90,16 @@ Also checks whether the variants' reference alleles match against the reference First 10 errors per category are below. Full report: /path/to/vcf_failed/report Critical error: Line 4: Error in meta-data section. Non-critical error: Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. + ❌ Normalisation check: 4 errors (152 total lines: 0 split, 0 realigned, 0 skipped) + First 10 errors are below. Full report: /path/to/vcf_failed/norm/log + Error: NON_ACGTN_ALT chr1 49338976 ]chr1:49277505]T + Error: NON_ACGTN_ALT chr1 49997014 TAT[chr1:50014208[ + Error: NON_ACGTN_ALT chr1 50014208 ]chr1:49997014]ATT + Error: NON_ACGTN_ALT chr1 191611692 [chr8:41723769[A input_passed.vcf ✔ Assembly check: 247/247 (100.0%) ✔ VCF check: 0 critical errors, 0 non-critical errors + ✔ Normalisation check: 0 errors (152 total lines: 0 split, 0 realigned, 0 skipped) - SAMPLE NAME CONCORDANCE CHECK Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names. diff --git a/tests/resources/validation_reports/expected_report_metadata_xlsx.html b/tests/resources/validation_reports/expected_metadata_xlsx_report.html similarity index 99% rename from tests/resources/validation_reports/expected_report_metadata_xlsx.html rename to tests/resources/validation_reports/expected_metadata_xlsx_report.html index 078ceb1..197b069 100644 --- a/tests/resources/validation_reports/expected_report_metadata_xlsx.html +++ b/tests/resources/validation_reports/expected_metadata_xlsx_report.html @@ -170,7 +170,8 @@

Metadata validation results

VCF validation results

Checks whether each file is compliant with the VCF specification. - Also checks whether the variants' reference alleles match against the reference assembly. + Also checks whether the variants' reference alleles match against the reference assembly, + and whether the file can be normalised (if necessary) using bcftools norm.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
@@ -227,9 +228,31 @@

input_fail.vcf

+
❌ Normalisation check: 4 errors (152 total lines: 0 split, 0 realigned, 0 skipped)
+
+
First 10 errors are below. Full report: /path/to/vcf_failed/norm/log
+ + + + + + + + + + + + + + + + +
CategoryError
Error NON_ACGTN_ALT chr1 49338976 ]chr1:49277505]T
Error NON_ACGTN_ALT chr1 49997014 TAT[chr1:50014208[
Error NON_ACGTN_ALT chr1 50014208 ]chr1:49997014]ATT
Error NON_ACGTN_ALT chr1 191611692 [chr8:41723769[A
+

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
+
✔ Normalisation check: 0 errors (152 total lines: 0 split, 0 realigned, 0 skipped)

Sample name concordance check

diff --git a/tests/resources/validation_reports/expected_report_metadata_xlsx.txt b/tests/resources/validation_reports/expected_metadata_xlsx_report.txt similarity index 91% rename from tests/resources/validation_reports/expected_report_metadata_xlsx.txt rename to tests/resources/validation_reports/expected_metadata_xlsx_report.txt index 8d7df7f..2c98656 100644 --- a/tests/resources/validation_reports/expected_report_metadata_xlsx.txt +++ b/tests/resources/validation_reports/expected_metadata_xlsx_report.txt @@ -65,7 +65,8 @@ For requirements, please refer to the EVA website (https://www.ebi.ac.uk/eva/?Su - VCF VALIDATION RESULTS Checks whether each file is compliant with the VCF specification (http://samtools.github.io/hts-specs/VCFv4.4.pdf). -Also checks whether the variants' reference alleles match against the reference assembly. +Also checks whether the variants' reference alleles match against the reference assembly, and whether the file can be +normalised (if necessary) using bcftools norm (https://samtools.github.io/bcftools/bcftools.html#norm). input_fail.vcf ❌ Assembly check: 26/36 (72.22%) First 10 errors per category are below. Full report: /path/to/assembly_failed/report @@ -83,9 +84,16 @@ Also checks whether the variants' reference alleles match against the reference First 10 errors per category are below. Full report: /path/to/vcf_failed/report Critical error: Line 4: Error in meta-data section. Non-critical error: Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. + ❌ Normalisation check: 4 errors (152 total lines: 0 split, 0 realigned, 0 skipped) + First 10 errors are below. Full report: /path/to/vcf_failed/norm/log + Error: NON_ACGTN_ALT chr1 49338976 ]chr1:49277505]T + Error: NON_ACGTN_ALT chr1 49997014 TAT[chr1:50014208[ + Error: NON_ACGTN_ALT chr1 50014208 ]chr1:49997014]ATT + Error: NON_ACGTN_ALT chr1 191611692 [chr8:41723769[A input_passed.vcf ✔ Assembly check: 247/247 (100.0%) ✔ VCF check: 0 critical errors, 0 non-critical errors + ✔ Normalisation check: 0 errors (152 total lines: 0 split, 0 realigned, 0 skipped) - SAMPLE NAME CONCORDANCE CHECK Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names. diff --git a/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html b/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html index 27f141b..8b9de8a 100644 --- a/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html +++ b/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.html @@ -194,7 +194,8 @@

Metadata validation results

VCF validation results

Checks whether each file is compliant with the VCF specification. - Also checks whether the variants' reference alleles match against the reference assembly. + Also checks whether the variants' reference alleles match against the reference assembly, + and whether the file can be normalised (if necessary) using bcftools norm.

input_fail.vcf

❌ Assembly check: 26/36 (72.22%)
@@ -251,9 +252,31 @@

input_fail.vcf

+
❌ Normalisation check: 4 errors (152 total lines: 0 split, 0 realigned, 0 skipped)
+
+
First 10 errors are below. Full report: /path/to/vcf_failed/norm/log
+ + + + + + + + + + + + + + + + +
CategoryError
Error NON_ACGTN_ALT chr1 49338976 ]chr1:49277505]T
Error NON_ACGTN_ALT chr1 49997014 TAT[chr1:50014208[
Error NON_ACGTN_ALT chr1 50014208 ]chr1:49997014]ATT
Error NON_ACGTN_ALT chr1 191611692 [chr8:41723769[A
+

input_passed.vcf

✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
+
✔ Normalisation check: 0 errors (152 total lines: 0 split, 0 realigned, 0 skipped)

Sample name concordance check

diff --git a/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.txt b/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.txt index 03a731e..7aa135f 100644 --- a/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.txt +++ b/tests/resources/validation_reports/expected_shallow_metadata_xlsx_report.txt @@ -74,7 +74,8 @@ For requirements, please refer to the EVA website (https://www.ebi.ac.uk/eva/?Su - VCF VALIDATION RESULTS Checks whether each file is compliant with the VCF specification (http://samtools.github.io/hts-specs/VCFv4.4.pdf). -Also checks whether the variants' reference alleles match against the reference assembly. +Also checks whether the variants' reference alleles match against the reference assembly, and whether the file can be +normalised (if necessary) using bcftools norm (https://samtools.github.io/bcftools/bcftools.html#norm). input_fail.vcf ❌ Assembly check: 26/36 (72.22%) First 10 errors per category are below. Full report: /path/to/assembly_failed/report @@ -92,9 +93,16 @@ Also checks whether the variants' reference alleles match against the reference First 10 errors per category are below. Full report: /path/to/vcf_failed/report Critical error: Line 4: Error in meta-data section. Non-critical error: Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. + ❌ Normalisation check: 4 errors (152 total lines: 0 split, 0 realigned, 0 skipped) + First 10 errors are below. Full report: /path/to/vcf_failed/norm/log + Error: NON_ACGTN_ALT chr1 49338976 ]chr1:49277505]T + Error: NON_ACGTN_ALT chr1 49997014 TAT[chr1:50014208[ + Error: NON_ACGTN_ALT chr1 50014208 ]chr1:49997014]ATT + Error: NON_ACGTN_ALT chr1 191611692 [chr8:41723769[A input_passed.vcf ✔ Assembly check: 247/247 (100.0%) ✔ VCF check: 0 critical errors, 0 non-critical errors + ✔ Normalisation check: 0 errors (152 total lines: 0 split, 0 realigned, 0 skipped) - SAMPLE NAME CONCORDANCE CHECK Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names. diff --git a/tests/resources/validation_reports/validation_output/norm_check/input_passed.vcf_bcftools_norm.log b/tests/resources/validation_reports/validation_output/norm_check/input_passed.vcf_bcftools_norm.log new file mode 100644 index 0000000..b1774ca --- /dev/null +++ b/tests/resources/validation_reports/validation_output/norm_check/input_passed.vcf_bcftools_norm.log @@ -0,0 +1 @@ +Lines total/split/realigned/skipped: 152/0/0/0 diff --git a/tests/test_docker_validator.py b/tests/test_docker_validator.py index a000688..66a62cc 100644 --- a/tests/test_docker_validator.py +++ b/tests/test_docker_validator.py @@ -92,35 +92,39 @@ def assert_sample_checker(self, sample_checker_file, expected_checker): def assert_validation_results(self, validator, expected_sample_checker, expected_metadata_files_json, expected_metadata_val, expected_semantic_val): + # Assert VCF format check vcf_format_dir = os.path.join(validator.output_dir, 'vcf_format') self.assertTrue(os.path.exists(vcf_format_dir)) - - vcf_format_log_file = os.path.join(vcf_format_dir, 'input_passed.vcf.vcf_format.log') - self.assertTrue(os.path.exists(vcf_format_log_file)) - - with open(vcf_format_log_file) as vcf_format_log_file: + vcf_format_log_path = os.path.join(vcf_format_dir, 'input_passed.vcf.vcf_format.log') + self.assertTrue(os.path.exists(vcf_format_log_path)) + with open(vcf_format_log_path) as vcf_format_log_file: vcf_format_logs = vcf_format_log_file.readlines() self.assertEqual('[info] According to the VCF specification, the input file is valid\n', vcf_format_logs[2]) - text_report = vcf_format_logs[1].split(':')[1].strip() with open(os.path.join(validator.output_dir, text_report)) as text_report: text_report_content = text_report.readlines() self.assertEqual('According to the VCF specification, the input file is valid\n', text_report_content[0]) - # assert assembly report + # Assert assembly report check assembly_check_dir = os.path.join(validator.output_dir, 'assembly_check') self.assertTrue(os.path.exists(assembly_check_dir)) - - assembly_check_log_file = os.path.join(assembly_check_dir, 'input_passed.vcf.assembly_check.log') - self.assertTrue(os.path.exists(assembly_check_log_file)) - - with open(assembly_check_log_file) as assembly_check_log_file: + assembly_check_log_path = os.path.join(assembly_check_dir, 'input_passed.vcf.assembly_check.log') + self.assertTrue(os.path.exists(assembly_check_log_path)) + with open(assembly_check_log_path) as assembly_check_log_file: assembly_check_logs = assembly_check_log_file.readlines() self.assertEqual('[info] Number of matches: 247/247\n', assembly_check_logs[4]) self.assertEqual('[info] Percentage of matches: 100%\n', assembly_check_logs[5]) + # Assert normalisation check + norm_check_dir = os.path.join(validator.output_dir, 'norm_check') + self.assertTrue(os.path.exists(norm_check_dir)) + norm_check_log_path = os.path.join(norm_check_dir, 'input_passed.vcf_bcftools_norm.log') + with open(norm_check_log_path) as norm_check_log_file: + norm_check_logs = norm_check_log_file.readlines() + self.assertEqual('[E::faidx_adjust_position] The sequence "1" was not found\n', norm_check_logs[0]) + # Assert Samples concordance self.assert_sample_checker(validator._sample_check_yaml, expected_sample_checker) diff --git a/tests/test_report.py b/tests/test_report.py index 1c8d2f2..fc3f2f6 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -6,7 +6,7 @@ import eva_sub_cli from eva_sub_cli.report import generate_html_report, generate_text_report -validation_results_xlsx = { +common_validation_results = { "ready_for_submission_to_eva": False, "assembly_check": { "input_passed.vcf": { @@ -37,6 +37,7 @@ "nb_mismatch": 10, "total": 36, }, + "pass": False, }, "vcf_check": { "input_passed.vcf": { @@ -56,6 +57,31 @@ "valid": False, "warning_count": 0, }, + "pass": False, + }, + 'norm_check': { + 'input_passed.vcf': { + 'error_list': [], + 'nb_error': 0, + 'nb_realigned': 0, + 'nb_skipped': 0, + 'nb_split': 0, + 'nb_total': 152, + 'report_path': '/path/to/vcf_passed/norm/log' + }, + 'input_fail.vcf': { + 'error_list': ['NON_ACGTN_ALT\tchr1\t49338976\t]chr1:49277505]T', + 'NON_ACGTN_ALT\tchr1\t49997014\tTAT[chr1:50014208[', + 'NON_ACGTN_ALT\tchr1\t50014208\t]chr1:49997014]ATT', + 'NON_ACGTN_ALT\tchr1\t191611692\t[chr8:41723769[A'], + 'nb_error': 4, + 'nb_realigned': 0, + 'nb_skipped': 0, + 'nb_split': 0, + 'nb_total': 152, + 'report_path': '/path/to/vcf_failed/norm/log' + }, + 'pass': False, }, "sample_check": { 'report_path': '/path/to/sample/report', @@ -81,11 +107,13 @@ 'more_per_submitted_files_metadata': {}, 'more_submitted_files_metadata': ['C1Sample ', ' C2Sample', 'C3Sample', 'C4Sample'] } - } + }, + "pass": False, }, # NB. obviously this doesn't make sense for the number of analyses in this report, but demonstrates the possible # outputs for this check. "fasta_check": { + "pass": False, 'not_all_insdc.fa': { 'report_path': '/path/to/not_all_insdc_check.yml', 'all_insdc': False, @@ -145,7 +173,22 @@ 'connection_error': '500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve' } }, + 'evidence_type_check': { + 'pass': False, + 'Analysis A': { + 'evidence_type': None, + 'errors': 'VCF file evidence type could not be determined: vcf_files_1, vcf_files_2' + }, + 'Analysis B': { + 'evidence_type': None, + 'errors': 'Multiple evidence types found: genotype, allele_frequency' + }, + } +} + +validation_results_xlsx = { 'metadata_check': { + "pass": False, 'spreadsheet_errors': [ {'sheet': 'Files', 'row': '', 'column': '', 'description': 'Sheet "Files" is missing'}, {'sheet': 'Project', 'row': 2, 'column': 'Project Title', @@ -166,164 +209,11 @@ 'description': 'Column "Sample Accession" is not populated'} ], 'spreadsheet_report_path': '/path/to/metadata/metadata_spreadsheet_validation.txt', - }, - - 'evidence_type_check': { - 'pass': False, - 'Analysis A': { - 'evidence_type': None, - 'errors': 'VCF file evidence type could not be determined: vcf_files_1, vcf_files_2' - }, - 'Analysis B': { - 'evidence_type': None, - 'errors': 'Multiple evidence types found: genotype, allele_frequency' - }, } } +validation_results_xlsx.update(common_validation_results) validation_results_json = { - "ready_for_submission_to_eva": False, - "assembly_check": { - "input_passed.vcf": { - "report_path": "/path/to/assembly_passed/report", - "error_list": [], - "match": 247, - "mismatch_list": [], - "nb_error": 0, - "nb_mismatch": 0, - "total": 247, - }, - "input_fail.vcf": { - "report_path": "/path/to/assembly_failed/report", - "error_list": ["The assembly checking could not be completed: Contig 'chr23' not found in assembly report"], - "match": 26, - "mismatch_list": [ - "Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c'", - "Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a'", - "Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c'", - "Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g'", - "Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c'", - "Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g'", - "Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g'", - "Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a'", - "Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c'", - ], - "nb_error": 1, - "nb_mismatch": 10, - "total": 36, - }, - "pass": False, - }, - "vcf_check": { - "input_passed.vcf": { - 'report_path': '/path/to/vcf_passed/report', - "error_count": 0, - "error_list": [], - "valid": True, - "warning_count": 0, - }, - "input_fail.vcf": { - 'report_path': '/path/to/vcf_failed/report', - "critical_count": 1, - "critical_list": ["Line 4: Error in meta-data section."], - "error_count": 1, - "error_list": [ - "Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.."], - "valid": False, - "warning_count": 0, - }, - "pass": False, - }, - "sample_check": { - 'report_path': '/path/to/sample/report', - 'overall_differences': True, - 'results_per_analysis': { - 'Analysis A': { - 'difference': True, - 'more_metadata_submitted_files': [' SampleA1', 'SampleA2 ', 'SampleA3', 'SampleA4', 'SampleA5', - 'SampleA6', 'SampleA7', 'SampleA8', 'SampleA9', 'SampleA10'], - 'more_per_submitted_files_metadata': {}, - 'more_submitted_files_metadata': ['A1Sample ', ' A2Sample', 'A3Sample', 'A4Sample', 'A5Sample', - 'A6Sample', 'A7Sample', 'A8Sample', 'A9Sample', 'A10Sample'] - }, - 'Analysis B': { - 'difference': False, - 'more_metadata_submitted_files': [], - 'more_per_submitted_files_metadata': {}, - 'more_submitted_files_metadata': [] - }, - 'Analysis C': { - 'difference': True, - 'more_metadata_submitted_files': ['SampleC1 ', ' SampleC2', 'SampleC3', 'SampleC4'], - 'more_per_submitted_files_metadata': {}, - 'more_submitted_files_metadata': ['C1Sample ', ' C2Sample', 'C3Sample', 'C4Sample'] - } - }, - "pass": False, - }, - # NB. obviously this doesn't make sense for the number of analyses in this report, but demonstrates the possible - # outputs for this check. - "fasta_check": { - "pass": False, - 'not_all_insdc.fa': { - 'report_path': '/path/to/not_all_insdc_check.yml', - 'all_insdc': False, - 'sequences': [ - {'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True}, - {'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': False} - ], - 'metadata_assembly_compatible': True, - 'possible_assemblies': {'GCA_1'}, - 'assembly_in_metadata': 'GCA_1', - 'associated_analyses': ['Analysis A'] - }, - 'metadata_asm_not_found.fa': { - 'report_path': '/path/to/metadata_asm_not_found.yml', - 'all_insdc': True, - 'sequences': [ - {'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True}, - {'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True} - ], - 'possible_assemblies': {'GCA_1'} - }, - 'metadata_asm_not_match.fa': { - 'report_path': '/path/to/metadata_asm_not_match.yml', - 'all_insdc': True, - 'sequences': [ - {'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True}, - {'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True} - ], - 'metadata_assembly_compatible': False, - 'possible_assemblies': {'GCA_1'}, - 'assembly_in_metadata': 'GCA_2', - 'associated_analyses': ['Analysis B'] - }, - 'metadata_asm_match.fa': { - 'report_path': '/path/to/metadata_asm_match.yml', - 'all_insdc': True, - 'sequences': [ - {'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True}, - {'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True} - ], - 'metadata_assembly_compatible': True, - 'possible_assemblies': {'GCA_1'}, - 'assembly_in_metadata': 'GCA_1', - 'associated_analyses': ['Analysis A'] - }, - 'metadata_error.fa': { - 'report_path': '/path/to/metadata_error.yml', - 'all_insdc': True, - 'sequences': [ - {'sequence_name': '1', 'sequence_md5': 'hsjvchdhdo3ate83jdfd76rp2', 'insdc': True}, - {'sequence_name': '2', 'sequence_md5': 'hjfdoijsfc47hfg0gh9qwjrve', 'insdc': True} - ], - 'metadata_assembly_compatible': True, - 'possible_assemblies': {'GCA_1'}, - 'assembly_in_metadata': 'GCA_1', - 'associated_analyses': ['Analysis C'], - 'connection_error': '500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve' - } - }, 'metadata_check': { "pass": False, 'json_errors': [ @@ -343,34 +233,23 @@ {'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'} ], 'json_report_path': '/path/to/json/metadata/report' - }, - - 'evidence_type_check': { - 'pass': False, - 'Analysis A': { - 'evidence_type': None, - 'errors': 'VCF file evidence type could not be determined: vcf_files_1, vcf_files_2' - }, - 'Analysis B': { - 'evidence_type': None, - 'errors': 'Multiple evidence types found: genotype, allele_frequency' - }, } } +validation_results_json.update(common_validation_results) class TestReport(TestCase): resource_dir = os.path.join(os.path.dirname(__file__), 'resources') expected_report_metadata_xlsx = os.path.join(resource_dir, 'validation_reports', - 'expected_report_metadata_xlsx.html') + 'expected_metadata_xlsx_report.html') expected_report_metadata_json = os.path.join(resource_dir, 'validation_reports', - 'expected_report_metadata_json.html') + 'expected_metadata_json_report.html') expected_report_metadata_xlsx_shallow = os.path.join(resource_dir, 'validation_reports', 'expected_shallow_metadata_xlsx_report.html') expected_text_report_metadata_xlsx = os.path.join(resource_dir, 'validation_reports', - 'expected_report_metadata_xlsx.txt') + 'expected_metadata_xlsx_report.txt') expected_text_report_metadata_json = os.path.join(resource_dir, 'validation_reports', - 'expected_report_metadata_json.txt') + 'expected_metadata_json_report.txt') expected_text_report_metadata_xlsx_shallow = os.path.join(resource_dir, 'validation_reports', 'expected_shallow_metadata_xlsx_report.txt') test_project_name = "My cool project" diff --git a/tests/test_validaton_results_parsers.py b/tests/test_validaton_results_parsers.py index 29fec21..8f8bdf2 100644 --- a/tests/test_validaton_results_parsers.py +++ b/tests/test_validaton_results_parsers.py @@ -2,7 +2,7 @@ from unittest import TestCase from eva_sub_cli.validators.validation_results_parsers import vcf_check_errors_is_critical, parse_assembly_check_log, \ - parse_assembly_check_report + parse_assembly_check_report, parse_bcftools_norm_report class TestValidationParsers(TestCase): @@ -42,3 +42,15 @@ def test_parse_assembly_check_report(self): assert nb_mismatch == 12 assert error_list == ['Chromosome scaffold_chr1 is not present in FASTA file'] assert nb_error == 1 + + def test_parse_bcftools_norm_report(self): + normalisation_report = os.path.join(self.resource_dir, 'norm_check', 'invalid.vcf_bcftools_norm.log') + error_list, nb_total, nb_split, nb_realigned, nb_skipped = parse_bcftools_norm_report(normalisation_report) + assert error_list == [ + "NON_ACGTN_ALT chr1 49338976 ]chr1:49277505]T", + "NON_ACGTN_ALT chr1 49997014 TAT[chr1:50014208[", + "NON_ACGTN_ALT chr1 50014208 ]chr1:49997014]ATT", + "NON_ACGTN_ALT chr1 191611692 [chr8:41723769[A" + ] + assert nb_total == 152 + assert nb_split == nb_realigned == nb_skipped == 0 diff --git a/tests/test_validator.py b/tests/test_validator.py index 89cb1ae..c839d99 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -16,6 +16,10 @@ 'input_passed.vcf': {'error_list': [], 'mismatch_list': [], 'nb_mismatch': 0, 'nb_error': 0, 'match': 247, 'total': 247} }, + 'norm_check': { + 'input_passed.vcf': {'error_list': [], 'nb_error': 0, 'nb_realigned': 0, 'nb_skipped': 0, 'nb_split': 0, + 'nb_total': 152, + 'report_path': '{resource_dir}/validation_reports/validation_output/norm_check/input_passed.vcf_bcftools_norm.log'}}, 'sample_check': { 'overall_differences': False, 'results_per_analysis': {