Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions eva_sub_cli/executables/check_fasta_insdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,11 @@ def get_analyses_and_reference_genome_from_metadata(vcf_files_for_fasta, json_fi
else:
all_analyses.add(analysis_aliases[0])
# Get (single) assembly associated with these analyses
assemblies = [metadata.get_reference_assembly_for_analysis(analysis) for analysis in all_analyses]
assemblies = {metadata.get_reference_assembly_for_analysis(analysis) for analysis in all_analyses}
if len(assemblies) != 1:
logger.error(f'Could not determine assembly accession to check against fasta file, out of: {assemblies}')
return all_analyses, None
return all_analyses, assemblies[0]
return all_analyses, assemblies.pop()


def check_assembly_in_metadata(assembly_in_metadata):
Expand Down
16 changes: 16 additions & 0 deletions tests/test_check_fasta_insdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ def test_get_analysis_and_reference_genome_from_metadata(self):
assert analyses == {'VD1'}
assert reference == 'GCA_000001405.27'

def test_get_analysis_and_reference_genome_from_metadata_multiple_analyses(self):
"""Test that multiple analyses with the same referenceGenome return that single assembly."""
working_dir = os.path.join(self.resource_dir, 'sample_checker')
metadata_json = os.path.join(working_dir, 'metadata.json')
vcf_file1 = os.path.join(working_dir, 'example1.vcf.gz') # VD1
vcf_file2 = os.path.join(working_dir, 'example2.vcf') # VD2
vcf_file3 = os.path.join(working_dir, 'example3.vcf') # VD3
os.chdir(working_dir)

# Multiple VCF files from different analyses, all with same referenceGenome
analyses, reference = get_analyses_and_reference_genome_from_metadata(
[vcf_file1, vcf_file2, vcf_file3], metadata_json
)
assert analyses == {'VD1', 'VD2', 'VD3'}
assert reference == 'GCA_000001405.27'

def test_get_analysis_and_reference_genome_from_metadata_absolute_paths(self):
working_dir = os.path.join(self.resource_dir, 'sample_checker')
metadata_json = os.path.join(working_dir, 'metadata.json')
Expand Down