diff --git a/Makefile b/Makefile
index c3baa81..8c1c446 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,6 @@
 .PHONY: test
 
 test:
+	docker-compose down
 	docker-compose run spec sh /app/test/run_tests.sh
+	docker-compose down
\ No newline at end of file
diff --git a/data_sources/djornl.yaml b/data_sources/djornl.yaml
new file mode 100644
index 0000000..495aa8a
--- /dev/null
+++ b/data_sources/djornl.yaml
@@ -0,0 +1,5 @@
+name: djornl
+category: network
+title: Jacobson Lab Exascale Networking data
+home_url: https://github.com/kbase/exascale_data
+data_url: https://github.com/kbase/exascale_data/releases/latest
diff --git a/importers/djornl/manifest.schema.json b/importers/djornl/manifest.schema.json
new file mode 100644
index 0000000..e29ab28
--- /dev/null
+++ b/importers/djornl/manifest.schema.json
@@ -0,0 +1,52 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "Exascale parser file manifest",
+  "type": "array",
+  "items": {
+    "type": "object",
+    "required": ["data_type", "path"],
+    "oneOf": [
+      {
+        "properties": {
+          "data_type": { "enum": ["cluster"] }
+        },
+        "required": [ "prefix" ]
+      },
+      {
+        "properties": {
+          "data_type": { "enum": [ "node", "edge" ] }
+        }
+      }
+    ],
+    "properties": {
+      "data_type": {
+        "title": "Data type",
+        "type": "string",
+        "enum": ["node", "edge", "cluster"]
+      },
+      "creation_date": {
+        "title": "File creation date",
+        "description": "date of file creation in the format YYYY-MM-DD",
+        "type": "string",
+        "format": "date"
+      },
+      "description": {
+        "title": "Description of the cluster set",
+        "type": "string"
+      },
+      "path": {
+        "title": "File path",
+        "type": "string"
+      },
+      "prefix": {
+        "title": "Prefix",
+        "type": "string",
+        "description": "The prefix to be used for clusters, e.g. markov_i2:4. Required for cluster data, not used for node or edge data"
+      },
+      "title": {
+        "title": "Name of the cluster set",
+        "type": "string"
+      }
+    }
+  }
+}
diff --git a/importers/djornl/parser.py b/importers/djornl/parser.py
index 9eb5c87..5295fdb 100644
--- a/importers/djornl/parser.py
+++ b/importers/djornl/parser.py
@@ -8,6 +8,8 @@
 import requests
 import os
 import csv
+import yaml
+from jsonschema.validators import Draft7Validator
 
 import importers.utils.config as config
 
@@ -28,34 +30,49 @@ def _configure(self):
         configuration['_NODE_NAME'] = 'djornl_node'
         configuration['_EDGE_NAME'] = 'djornl_edge'
 
-        # Path config
-        configuration['_NODE_PATH'] = os.path.join(
-            configuration['ROOT_DATA_PATH'],
-            'aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv'
-        )
-        configuration['_NODE_FILE_COL_COUNT'] = 20
+        # read the manifest file, which contains path and file type info
+        manifest_file = os.path.join(configuration['ROOT_DATA_PATH'], 'manifest.yaml')
+
+        try:
+            with open(manifest_file) as fd:
+                manifest = yaml.safe_load(fd)
+        except FileNotFoundError:
+            raise RuntimeError(
+                f"No manifest file found at {manifest_file}.\n"
+                + "Please ensure that you have created a manifest that lists the files "
+                + "in the release"
+            )
+
+        # load the schema for the manifest and ensure that it is valid
+        schema_file = os.path.join(os.path.dirname(__file__), 'manifest.schema.json')
+        with open(schema_file) as fd:
+            manifest_schema = json.load(fd)
+
+        validator = Draft7Validator(manifest_schema)
+        if not validator.is_valid(manifest):
+            raise RuntimeError(
+                "The manifest file failed validation with the following errors:\n"
+                + "\n".join(e.message for e in sorted(validator.iter_errors(manifest), key=str))
+                + "\nPlease recheck the file and try again."
+            )
+
+        # make sure all the files listed actually exist
+        for type in ['node', 'edge', 'cluster']:
+            configuration[type + '_files'] = []
+
+        for file in manifest:
+            file_path = os.path.join(configuration['ROOT_DATA_PATH'], file['path'])
+
+            if not os.path.exists(file_path):
+                raise RuntimeError(f"{file_path}: file does not exist")
+
+            if not os.path.isfile(file_path):
+                raise RuntimeError(f"{file_path}: not a file")
+
+            # add the file to the appropriate list
+            file['file_path'] = file_path
+            configuration[file['data_type'] + '_files'].append(file)
 
-        configuration['_EDGE_PATH'] = os.path.join(
-            configuration['ROOT_DATA_PATH'],
-            'merged_edges-AMW-060820_AF.tsv'
-        )
-        configuration['_EDGE_FILE_COL_COUNT'] = 5
-
-        _CLUSTER_BASE = os.path.join(configuration['ROOT_DATA_PATH'], 'cluster_data')
-        configuration['_CLUSTER_PATHS'] = {
-            'cluster_I2': os.path.join(
-                _CLUSTER_BASE,
-                'out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I2_named.tsv'
-            ),
-            'cluster_I4': os.path.join(
-                _CLUSTER_BASE,
-                'out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I4_named.tsv'
-            ),
-            'cluster_I6': os.path.join(
-                _CLUSTER_BASE,
-                'out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I6_named.tsv'
-            ),
-        }
         self._config = configuration
         return self._config
 
@@ -76,33 +93,44 @@ def load_edges(self):
         node_ix = {}
         edges = []
         node_name = self.config()['_NODE_NAME']
-        expected_col_count = self.config()['_EDGE_FILE_COL_COUNT']
-
-        with open(self.config()['_EDGE_PATH']) as fd:
-            csv_reader = csv.reader(fd, delimiter='\t')
-            next(csv_reader, None)  # skip headers
-            line_no = 1
-            for row in csv_reader:
-                line_no += 1
-
-                cols = [c.strip() for c in row]
-                if len(cols) != expected_col_count:
-                    n_cols = len(cols)
-                    raise RuntimeError(f"line {line_no}: expected {expected_col_count} cols, found {n_cols}")
-
-                node_ix[cols[0]] = 1
-                node_ix[cols[1]] = 1
-                edge_type = cols[4]
-                if edge_type not in edge_remap:
-                    raise RuntimeError(f"line {line_no}: invalid edge type: {edge_type}")
-
-                edges.append({
-                    '_key': f'{cols[0]}__{cols[1]}__{edge_remap[edge_type]}__{cols[2]}',
-                    '_from': f'{node_name}/{cols[0]}',
-                    '_to': f'{node_name}/{cols[1]}',
-                    'score': float(cols[2]),
-                    'edge_type': edge_remap[edge_type],
-                })
+        expected_col_count = 0
+        headers = []
+
+        for file in self.config()['edge_files']:
+            with open(file['file_path']) as fd:
+                csv_reader = csv.reader(fd, delimiter='\t')
+                line_no = 0
+                for row in csv_reader:
+                    line_no += 1
+                    if len(row) <= 1 or row[0][0] == '#':
+                        # comment / metadata
+                        continue
+
+                    cols = [c.strip() for c in row]
+
+                    if len(cols) != expected_col_count:
+                        n_cols = len(cols)
+
+                        if len(headers) == 0:
+                            expected_col_count = len(cols)
+                            headers = cols
+                            continue
+
+                        raise RuntimeError(f"{file['path']} line {line_no}: expected {expected_col_count} cols, found {n_cols}")
+
+                    node_ix[cols[0]] = 1
+                    node_ix[cols[1]] = 1
+                    edge_type = cols[4]
+                    if edge_type not in edge_remap:
+                        raise RuntimeError(f"{file['path']} line {line_no}: invalid edge type: {edge_type}")
+
+                    edges.append({
+                        '_key': f'{cols[0]}__{cols[1]}__{edge_remap[edge_type]}__{cols[2]}',
+                        '_from': f'{node_name}/{cols[0]}',
+                        '_to': f'{node_name}/{cols[1]}',
+                        'score': float(cols[2]),
+                        'edge_type': edge_remap[edge_type],
+                    })
 
         return {
             'nodes': [{'_key': n} for n in node_ix.keys()],
@@ -114,73 +142,108 @@ def load_node_metadata(self):
         """Load node metadata"""
 
         nodes = []
-        expected_col_count = self.config()['_NODE_FILE_COL_COUNT']
-        with open(self.config()['_NODE_PATH']) as fd:
-            csv_reader = csv.reader(fd, delimiter=',')
-            next(csv_reader, None)  # skip headers
-            line_no = 1
-            for row in csv_reader:
-                line_no += 1
-
-                cols = [c.strip() for c in row]
-                if len(cols) != expected_col_count:
-                    n_cols = len(cols)
-                    raise RuntimeError(f"line {line_no}: expected {expected_col_count} cols, found {n_cols}")
-
-                _key = cols[0]
-                node_type = cols[1]
-                if node_type != 'gene' and node_type != 'pheno':
-                    raise RuntimeError(f"line {line_no}: invalid node type: {node_type}")
-
-                go_terms = [c.strip() for c in cols[10].split(',')] if len(cols[10]) else []
-
-                doc = {
-                    '_key': _key,
-                    'node_type': node_type,
-                    'transcript': cols[2],
-                    'gene_symbol': cols[3],
-                    'gene_full_name': cols[4],
-                    'gene_model_type': cols[5],
-                    'tair_computational_desc': cols[6],
-                    'tair_curator_summary': cols[7],
-                    'tair_short_desc': cols[8],
-                    'go_descr': cols[9],
-                    'go_terms': go_terms,
-                    'mapman_bin': cols[11],
-                    'mapman_name': cols[12],
-                    'mapman_desc': cols[13],
-                    'pheno_aragwas_id': cols[14],
-                    'pheno_desc1': cols[15],
-                    'pheno_desc2': cols[16],
-                    'pheno_desc3': cols[17],
-                    'pheno_ref': cols[18],
-                    'user_notes': cols[19],
-                }
-                nodes.append(doc)
+        headers = []
+        expected_col_count = 0
+        valid_node_types = ['gene', 'pheno']
+        for file in self.config()['node_files']:
+            with open(file['file_path']) as fd:
+                csv_reader = csv.reader(fd, delimiter=',')
+                line_no = 0
+                for row in csv_reader:
+                    line_no += 1
+                    if len(row) <= 1 or row[0][0] == '#':
+                        # comment / metadata
+                        continue
+
+                    cols = [c.strip() for c in row]
+                    if len(cols) != expected_col_count:
+
+                        if len(headers) == 0:
+                            # this is the header row; set up the expected column count
+                            expected_col_count = len(cols)
+                            headers = cols
+                            continue
+
+                        # otherwise, this row does not have the correct number of columns
+                        n_cols = len(cols)
+                        raise RuntimeError(f"{file['path']} line {line_no}: expected {expected_col_count} cols, found {n_cols}")
+
+                    _key = cols[0]
+                    node_type = cols[1]
+                    if node_type not in valid_node_types:
+                        raise RuntimeError(f"{file['path']} line {line_no}: invalid node type: {node_type}")
+
+                    go_terms = [c.strip() for c in cols[10].split(',')] if len(cols[10]) else []
+
+                    doc = {
+                        '_key': _key,
+                        'node_type': node_type,
+                        'transcript': cols[2],
+                        'gene_symbol': cols[3],
+                        'gene_full_name': cols[4],
+                        'gene_model_type': cols[5],
+                        'tair_computational_desc': cols[6],
+                        'tair_curator_summary': cols[7],
+                        'tair_short_desc': cols[8],
+                        'go_descr': cols[9],
+                        'go_terms': go_terms,
+                        'mapman_bin': cols[11],
+                        'mapman_name': cols[12],
+                        'mapman_desc': cols[13],
+                        'pheno_aragwas_id': cols[14],
+                        'pheno_desc1': cols[15],
+                        'pheno_desc2': cols[16],
+                        'pheno_desc3': cols[17],
+                        'pheno_ref': cols[18],
+                        'user_notes': cols[19],
+                    }
+                    nodes.append(doc)
 
         return {'nodes': nodes}
 
 
     def load_cluster_data(self):
         """Annotate genes with cluster ID fields."""
-        nodes = []
-        cluster_paths = self.config()['_CLUSTER_PATHS']
-        for (cluster_label, path) in cluster_paths.items():
-            with open(path) as fd:
+
+        # index of nodes
+        node_ix = {}
+        for file in self.config()['cluster_files']:
+            cluster_label = file['prefix']
+            with open(file['file_path']) as fd:
                 csv_reader = csv.reader(fd, delimiter='\t')
+                line_no = 0
                 for row in csv_reader:
-                    if len(row) > 1:
-                        # remove the 'Cluster' text
-                        cluster_id = row[0].replace('Cluster','')
-                        gene_keys = row[1:]
-                        nodes += [
-                            {'_key': key, cluster_label: int(cluster_id)}
-                            for key in gene_keys
-                        ]
+                    line_no += 1
+                    if len(row) <= 1 or row[0][0] == '#':
+                        # comment / metadata
+                        continue
+
+                    self._parse_cluster_row(row, cluster_label, node_ix)
+
+        # gather a list of cluster IDs for each node
+        nodes = [{
+            '_key': key,
+            'clusters': cluster_data
+        } for (key, cluster_data) in node_ix.items()]
 
         return {'nodes': nodes}
 
 
+    def _parse_cluster_row(self, row, cluster_label, node_ix):
+
+        # remove the 'Cluster' text
+        id = row[0].replace('Cluster','')
+        node_keys = row[1:]
+
+        for key in node_keys:
+            if key not in node_ix:
+                node_ix[key] = []
+
+            cluster_id = cluster_label + ':' + id
+            if cluster_id not in node_ix[key]:
+                node_ix[key].append(cluster_id)
+
+
     def save_dataset(self, dataset):
 
         if 'nodes' in dataset and len(dataset['nodes']) > 0:
@@ -212,3 +275,32 @@ def load_data(self):
         self.save_dataset(self.load_node_metadata())
         self.save_dataset(self.load_cluster_data())
 
+
+    def check_data_delta(self):
+        edge_data = self.load_edges()
+        node_metadata = self.load_node_metadata()
+        clusters = self.load_cluster_data()
+
+        self.check_deltas(edge_data=edge_data, node_metadata=node_metadata, cluster_data=clusters)
+
+    def check_deltas(self, edge_data={}, node_metadata={}, cluster_data={}):
+
+        edge_nodes = set([e['_key'] for e in edge_data['nodes']])
+        node_metadata_nodes = set([e['_key'] for e in node_metadata['nodes']])
+        cluster_nodes = set([e['_key'] for e in cluster_data['nodes']])
+        all_nodes = edge_nodes.union(node_metadata_nodes).union(cluster_nodes)
+
+        # check all nodes in cluster_data have node_metadata
+        clstr_no_node_md_set = cluster_nodes.difference(node_metadata_nodes)
+        if clstr_no_node_md_set:
+           print({'clusters with no node metadata': clstr_no_node_md_set})
+
+        # check all nodes in the edge_data have node_metadata
+        edge_no_node_md_set = edge_nodes.difference(node_metadata_nodes)
+        if edge_no_node_md_set:
+            print({'edges with no node metadata': edge_no_node_md_set})
+
+        # count all edges
+        print("Dataset contains " + str(len(edge_data['edges'])) + " edges")
+        # count all nodes
+        print("Dataset contains " + str(len(all_nodes)) + " nodes")
diff --git a/schemas/deltaloader/delta_load_registry.yaml b/schemas/deltaloader/delta_load_registry.yaml
index dc9c7f8..419b2a7 100644
--- a/schemas/deltaloader/delta_load_registry.yaml
+++ b/schemas/deltaloader/delta_load_registry.yaml
@@ -5,7 +5,7 @@ schema:
   "$schema": http://json-schema.org/draft-07/schema#
   title: delta_load_registry
   type: object
-  description: Don't touch this. It's for the exlusive use of delta loaders.
+  description: Don't touch this. It's for the exclusive use of delta loaders.
   properties:
     _key:
       type: string
diff --git a/schemas/djornl/djornl_node.yaml b/schemas/djornl/djornl_node.yaml
index 9248f1c..a7b44a8 100644
--- a/schemas/djornl/djornl_node.yaml
+++ b/schemas/djornl/djornl_node.yaml
@@ -2,6 +2,10 @@ name: djornl_node
 type: vertex
 delta: false
 
+indexes:
+ - type: hash
+   fields: ["clusters[*]"]
+
 schema:
   "$schema": http://json-schema.org/draft-07/schema#
   title: Gene and Phenotype Vertices
@@ -13,21 +17,14 @@ schema:
       type: string
       title: Key
       examples: ["AT1G01010"]
-    cluster_I2:
-      type: integer
-      title: Cluster 2 ID
-      description: Iterative random forest cluster group ID
-      examples: [1]
-    cluster_I4:
-      type: integer
-      title: Cluster 4 ID
-      description: Iterative random forest cluster group ID
-      examples: [13]
-    cluster_I6:
-      type: integer
-      title: Cluster 6 ID
-      description: Iterative random forest cluster group ID
-      examples: [27]
+    clusters:
+      type: array
+      title: Clusters
+      description: Clusters to which the node has been assigned
+      items:
+        type: string
+#        pattern: "^\w+:\d+$"
+      examples: [["markov_i2:1", "markov_i4:5"], ["markov_i6:3"]]
     node_type:
       type: string
       title: Node type
diff --git a/stored_queries/djornl/djornl_fetch_clusters.yaml b/stored_queries/djornl/djornl_fetch_clusters.yaml
index 4c6b8c5..1fadca3 100644
--- a/stored_queries/djornl/djornl_fetch_clusters.yaml
+++ b/stored_queries/djornl/djornl_fetch_clusters.yaml
@@ -2,25 +2,13 @@ name: djornl_fetch_clusters
 description: Fetch all nodes that are members of the specified cluster(s), and the edges and nodes within the specified distance (number of hops) of those nodes.
 params:
   type: object
+  required: [cluster_ids]
   properties:
-    cluster_i2_ids:
-      title: Cluster I2 IDs
-      description: Cluster I2 IDs to locate
-      items: {type: integer}
-      default: []
-      examples: [[1], [3, 5]]
-    cluster_i4_ids:
-      title: Cluster I4 IDs
-      description: Cluster I4 IDs to locate
-      items: {type: integer}
-      examples: [[2], [4, 6]]
-      default: []
-    cluster_i6_ids:
-      title: Cluster I6 IDs
-      description: Cluster I6 IDs to locate
-      items: {type: integer}
-      examples: [[666], [999, 333]]
-      default: []
+    cluster_ids:
+      title: Cluster IDs
+      description: Cluster IDs, in the form "clustering_system_name:cluster_id"
+      items: {type: string}
+      examples: [['markov_i2:5', 'markov_i6:2'],['markov_i6:1']]
     distance:
       type: integer
       title: Traversal Distance
@@ -31,7 +19,7 @@ params:
 query: |
   LET node_ids = (
     FOR n IN djornl_node
-      FILTER n.cluster_I2 IN @cluster_i2_ids OR n.cluster_I4 IN @cluster_i4_ids OR n.cluster_I6 IN @cluster_i6_ids
+      FILTER n.clusters ANY IN @cluster_ids
       FOR node IN 0..@distance ANY n djornl_edge
         OPTIONS {bfs: true, uniqueVertices: "global"}
         RETURN DISTINCT node._id
diff --git a/test/djornl/col_count_errors/manifest.yaml b/test/djornl/col_count_errors/manifest.yaml
new file mode 100644
index 0000000..88ab96d
--- /dev/null
+++ b/test/djornl/col_count_errors/manifest.yaml
@@ -0,0 +1,5 @@
+- data_type: edge
+  path: merged_edges-AMW-060820_AF.tsv
+
+- data_type: node
+  path: aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv
diff --git a/test/djornl/empty_files/manifest.yaml b/test/djornl/empty_files/manifest.yaml
new file mode 100644
index 0000000..7d42ff6
--- /dev/null
+++ b/test/djornl/empty_files/manifest.yaml
@@ -0,0 +1,17 @@
+- data_type: edge
+  path: merged_edges-AMW-060820_AF.tsv
+
+- data_type: node
+  path: aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv
+
+- data_type: cluster
+  prefix: markov_i2
+  path: cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I2_named.tsv
+
+- data_type: cluster
+  prefix: markov_i4
+  path: cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I4_named.tsv
+
+- data_type: cluster
+  prefix: markov_i6
+  path: cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I6_named.tsv
diff --git a/test/djornl/invalid_file/edges.tsv/touch b/test/djornl/invalid_file/edges.tsv/touch
new file mode 100644
index 0000000..e69de29
diff --git a/test/djornl/invalid_file/manifest.yaml b/test/djornl/invalid_file/manifest.yaml
new file mode 100644
index 0000000..3a12de5
--- /dev/null
+++ b/test/djornl/invalid_file/manifest.yaml
@@ -0,0 +1,9 @@
+- data_type: edge
+  path: edges.tsv
+
+- data_type: node
+  path: nodes.csv
+
+- data_type: cluster
+  prefix: markov_i2
+  path: clusters.tsv
diff --git a/test/djornl/invalid_manifest/manifest.yaml b/test/djornl/invalid_manifest/manifest.yaml
new file mode 100644
index 0000000..e7fa88e
--- /dev/null
+++ b/test/djornl/invalid_manifest/manifest.yaml
@@ -0,0 +1,10 @@
+- data_type: edge
+  path: edges.tsv
+
+- data_type: node
+
+- data_type: cluster
+  path: clusters.tsv
+
+- data_type: ping-pong balls
+  path: where?
\ No newline at end of file
diff --git a/test/djornl/invalid_types/aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv b/test/djornl/invalid_types/aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv
index af5fa6c..543dd99 100644
--- a/test/djornl/invalid_types/aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv
+++ b/test/djornl/invalid_types/aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv
@@ -1,4 +1,5 @@
 node_id,node_type,transcript,gene_symbol,gene_full_name,gene_model_type,TAIR_Computational_description,TAIR_Curator_summary,TAIR_short_description,GO_descr,GO_terms,MapMan_bin,MapMan_name,MapMan_descr,pheno_AraGWAS_ID,pheno_descrip1,pheno_descrip2,pheno_descrip3,pheno_ref,UserNotes
+# data_type: node
 As2,pheno,,,,,,,,,,,,,10.21958/phenotype:103,,bacterial disease resistance,The resistance exhibited by a plant or a group of plants (population) in response to the disease caused by a bacterial pathogen infection as compared to the susceptible and/or the reference plants of the same species. [GR:pj],"Atwell et. al, Nature 2010",
 As75,pheno,,,,,,,,,,,,,10.21958/phenotype:67,"Arsenic concentrations in leaves, grown in soil. Elemental analysis was performed with an ICP-MS (PerkinElmer). Sample normalized to calculated weights as described in Baxter et al., 2008",arsenic concentration,A mineral and ion content related trait (TO:0000465) which is the concentration of arsenic (CHEBI:22632) in some plant structure (PO:0009011). [GR:Karthik],"Atwell et. al, Nature 2010",
 AT1G01010,Monkey,AT1G01010.1,NTL10,NAC domain containing protein 1,protein_coding,NAC domain containing protein 1;(source:Araport11),,NAC domain containing protein 1,"DNA-binding transcription factor activity, DNA binding","GO:0003700, GO:0003677",15.5.17,.RNA biosynthesis.transcriptional regulation.transcription factor (NAC),transcription factor (NAC) (original description: pep chromosome:TAIR10:1:3631:5899:1 gene:AT1G01010 transcript:AT1G01010.1 gene_biotype:protein_coding transcript_biotype:protein_coding gene_symbol:NAC001 description:NAC domain-containing protein 1 [Source:UniProtKB/Swiss-Prot;Acc:Q0WV96]),,,,,,
diff --git a/test/djornl/invalid_types/manifest.yaml b/test/djornl/invalid_types/manifest.yaml
new file mode 100644
index 0000000..88ab96d
--- /dev/null
+++ b/test/djornl/invalid_types/manifest.yaml
@@ -0,0 +1,5 @@
+- data_type: edge
+  path: merged_edges-AMW-060820_AF.tsv
+
+- data_type: node
+  path: aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv
diff --git a/test/djornl/invalid_types/merged_edges-AMW-060820_AF.tsv b/test/djornl/invalid_types/merged_edges-AMW-060820_AF.tsv
index f9857bd..a98f49f 100644
--- a/test/djornl/invalid_types/merged_edges-AMW-060820_AF.tsv
+++ b/test/djornl/invalid_types/merged_edges-AMW-060820_AF.tsv
@@ -1,3 +1,4 @@
+# data_type: edge
 node1	node2	edge	edge_descrip	layer_descrip
 As2	AT1G01020	8.422046084731258	AraGWAS-Association_score	AraGWAS-Some-Old-Rubbish-I-Made-Up
 As2	AT1G01040	5.422046084731258	AraGWAS-Association_score	AraGWAS-Phenotype_Associations
diff --git a/test/djornl/missing_files/manifest.yaml b/test/djornl/missing_files/manifest.yaml
new file mode 100644
index 0000000..3a12de5
--- /dev/null
+++ b/test/djornl/missing_files/manifest.yaml
@@ -0,0 +1,9 @@
+- data_type: edge
+  path: edges.tsv
+
+- data_type: node
+  path: nodes.csv
+
+- data_type: cluster
+  prefix: markov_i2
+  path: clusters.tsv
diff --git a/test/djornl/results.json b/test/djornl/results.json
index a844c2c..7fd3a4d 100644
--- a/test/djornl/results.json
+++ b/test/djornl/results.json
@@ -27,21 +27,15 @@
   },
   "load_cluster_data": {
     "nodes": [
-      {"_key": "AT1G01010", "cluster_I2": 1},
-      {"_key": "AT1G01030", "cluster_I2": 1},
-      {"_key": "AT1G01040", "cluster_I2": 1},
-      {"_key": "AT1G01050", "cluster_I2": 2},
-      {"_key": "AT1G01060", "cluster_I2": 2},
-      {"_key": "AT1G01070", "cluster_I2": 2},
-      {"_key": "AT1G01080", "cluster_I2": 3},
-      {"_key": "AT1G01090", "cluster_I2": 3},
-      {"_key": "AT1G01020", "cluster_I2": 5},
-      {"_key": "AT1G01040", "cluster_I6": 1},
-      {"_key": "AT1G01090", "cluster_I6": 1},
-      {"_key": "AT1G01070", "cluster_I6": 2},
-      {"_key": "AT1G01010", "cluster_I6": 3},
-      {"_key": "AT1G01020", "cluster_I6": 3},
-      {"_key": "AT1G01030", "cluster_I6": 3}
+      {"_key": "AT1G01010", "clusters": ["markov_i2:1", "markov_i6:3"]},
+      {"_key": "AT1G01030", "clusters": ["markov_i2:1", "markov_i6:3"]},
+      {"_key": "AT1G01040", "clusters": ["markov_i2:1", "markov_i6:1"]},
+      {"_key": "AT1G01050", "clusters": ["markov_i2:2"]},
+      {"_key": "AT1G01060", "clusters": ["markov_i2:2"]},
+      {"_key": "AT1G01070", "clusters": ["markov_i2:2", "markov_i6:2"]},
+      {"_key": "AT1G01080", "clusters": ["markov_i2:3"]},
+      {"_key": "AT1G01090", "clusters": ["markov_i2:3", "markov_i6:1"]},
+      {"_key": "AT1G01020", "clusters": ["markov_i2:5", "markov_i6:3"]}
     ]
   },
   "load_node_metadata": {
@@ -93,220 +87,265 @@
     ]
   },
   "fetch_genes": {
-    "AT1G01010": {
-      "0": {
-        "nodes": ["AT1G01010"],
-        "edges": []
+    "keys": {
+      "Mary Poppins": {
+        "distance": {
+          "0": {"nodes": [], "edges": []},
+          "1": {"nodes": [], "edges": []},
+          "5": {"nodes": [], "edges": []}
+        }
       },
-      "1": {
-        "nodes": [
-          "AT1G01010",
-          "AT1G01020",
-          "AT1G01030",
-          "AT1G01040"
-        ],
-        "edges": [
-          "AT1G01010__AT1G01020__ppi_hithru__2.3",
-          "AT1G01010__AT1G01030__ppi_hithru__2.4",
-          "AT1G01010__AT1G01040__domain_co_occur__2.5",
-          "AT1G01010__AT1G01040__ppi_liter__170.5"
-        ]
+      "AT1G01010": {
+        "distance": {
+          "0": {
+            "nodes": ["AT1G01010"],
+            "edges": []
+          },
+          "1": {
+            "nodes": [
+              "AT1G01010",
+              "AT1G01020",
+              "AT1G01030",
+              "AT1G01040"
+            ],
+            "edges": [
+              "AT1G01010__AT1G01020__ppi_hithru__2.3",
+              "AT1G01010__AT1G01030__ppi_hithru__2.4",
+              "AT1G01010__AT1G01040__domain_co_occur__2.5",
+              "AT1G01010__AT1G01040__ppi_liter__170.5"
+            ]
+          },
+          "5": {
+            "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As2__AT1G01040__pheno_assn__5.4",
+              "As75__AT1G01020__pheno_assn__39.9",
+              "AT1G01010__AT1G01020__ppi_hithru__2.3",
+              "AT1G01010__AT1G01030__ppi_hithru__2.4",
+              "AT1G01010__AT1G01040__domain_co_occur__2.5",
+              "AT1G01010__AT1G01040__ppi_liter__170.5",
+              "AT1G01030__AT1G01050__gene_coexpr__2.6",
+              "AT1G01050__AT1G01060__ppi_liter__2.7"
+            ]
+          }
+        }
       },
-      "5": {
-        "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As2__AT1G01040__pheno_assn__5.4",
-          "As75__AT1G01020__pheno_assn__39.9",
-          "AT1G01010__AT1G01020__ppi_hithru__2.3",
-          "AT1G01010__AT1G01030__ppi_hithru__2.4",
-          "AT1G01010__AT1G01040__domain_co_occur__2.5",
-          "AT1G01010__AT1G01040__ppi_liter__170.5",
-          "AT1G01030__AT1G01050__gene_coexpr__2.6",
-          "AT1G01050__AT1G01060__ppi_liter__2.7"
-        ]
-      }
-    },
-    "AT1G01020__AT1G01070": {
-      "0": {
-        "nodes": ["AT1G01020", "AT1G01070"],
-        "edges": []
-      },
-      "1": {
-        "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01070"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As75__AT1G01020__pheno_assn__39.9",
-          "AT1G01010__AT1G01020__ppi_hithru__2.3"
-        ]
-      },
-      "5": {
-        "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060", "AT1G01070"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As2__AT1G01040__pheno_assn__5.4",
-          "As75__AT1G01020__pheno_assn__39.9",
-          "AT1G01010__AT1G01020__ppi_hithru__2.3",
-          "AT1G01010__AT1G01030__ppi_hithru__2.4",
-          "AT1G01010__AT1G01040__domain_co_occur__2.5",
-          "AT1G01010__AT1G01040__ppi_liter__170.5",
-          "AT1G01030__AT1G01050__gene_coexpr__2.6",
-          "AT1G01050__AT1G01060__ppi_liter__2.7"
-        ]
+      "AT1G01020__AT1G01070": {
+        "distance": {
+          "0": {
+            "nodes": ["AT1G01020", "AT1G01070"],
+            "edges": []
+          },
+          "1": {
+            "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01070"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As75__AT1G01020__pheno_assn__39.9",
+              "AT1G01010__AT1G01020__ppi_hithru__2.3"
+            ]
+          },
+          "5": {
+            "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060", "AT1G01070"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As2__AT1G01040__pheno_assn__5.4",
+              "As75__AT1G01020__pheno_assn__39.9",
+              "AT1G01010__AT1G01020__ppi_hithru__2.3",
+              "AT1G01010__AT1G01030__ppi_hithru__2.4",
+              "AT1G01010__AT1G01040__domain_co_occur__2.5",
+              "AT1G01010__AT1G01040__ppi_liter__170.5",
+              "AT1G01030__AT1G01050__gene_coexpr__2.6",
+              "AT1G01050__AT1G01060__ppi_liter__2.7"
+            ]
+          }
+        }
       }
     }
   },
   "fetch_phenotypes": {
-    "As2": {
-      "0": {
-        "nodes": ["As2"],
-        "edges": []
-      },
-      "1": {
-        "nodes": ["As2", "AT1G01020", "AT1G01040"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As2__AT1G01040__pheno_assn__5.4"
-        ]
-      },
-      "5": {
-        "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As2__AT1G01040__pheno_assn__5.4",
-          "As75__AT1G01020__pheno_assn__39.9",
-          "AT1G01010__AT1G01020__ppi_hithru__2.3",
-          "AT1G01010__AT1G01030__ppi_hithru__2.4",
-          "AT1G01010__AT1G01040__domain_co_occur__2.5",
-          "AT1G01010__AT1G01040__ppi_liter__170.5",
-          "AT1G01030__AT1G01050__gene_coexpr__2.6",
-          "AT1G01050__AT1G01060__ppi_liter__2.7"
-        ]
-      }
-    },
-    "As2__Na23": {
-      "0": {
-        "nodes": ["As2", "Na23"],
-        "edges": []
+    "keys": {
+      "Mary Poppins": {
+        "distance": {
+          "0": {"nodes": [], "edges": []},
+          "1": {"nodes": [], "edges": []},
+          "5": {"nodes": [], "edges": []}
+        }
       },
-      "1": {
-        "nodes": ["As2", "Na23", "AT1G01020", "AT1G01040"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As2__AT1G01040__pheno_assn__5.4"
-        ]
+      "As2": {
+        "distance": {
+          "0": {
+            "nodes": ["As2"],
+            "edges": []
+          },
+          "1": {
+            "nodes": ["As2", "AT1G01020", "AT1G01040"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As2__AT1G01040__pheno_assn__5.4"
+            ]
+          },
+          "5": {
+            "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As2__AT1G01040__pheno_assn__5.4",
+              "As75__AT1G01020__pheno_assn__39.9",
+              "AT1G01010__AT1G01020__ppi_hithru__2.3",
+              "AT1G01010__AT1G01030__ppi_hithru__2.4",
+              "AT1G01010__AT1G01040__domain_co_occur__2.5",
+              "AT1G01010__AT1G01040__ppi_liter__170.5",
+              "AT1G01030__AT1G01050__gene_coexpr__2.6",
+              "AT1G01050__AT1G01060__ppi_liter__2.7"
+            ]
+          }
+        }
       },
-      "5": {
-        "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060", "Na23"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As2__AT1G01040__pheno_assn__5.4",
-          "As75__AT1G01020__pheno_assn__39.9",
-          "AT1G01010__AT1G01020__ppi_hithru__2.3",
-          "AT1G01010__AT1G01030__ppi_hithru__2.4",
-          "AT1G01010__AT1G01040__domain_co_occur__2.5",
-          "AT1G01010__AT1G01040__ppi_liter__170.5",
-          "AT1G01030__AT1G01050__gene_coexpr__2.6",
-          "AT1G01050__AT1G01060__ppi_liter__2.7"
-        ]
+      "As2__Na23": {
+        "distance": {
+          "0": {
+            "nodes": ["As2", "Na23"],
+            "edges": []
+          },
+          "1": {
+            "nodes": ["As2", "Na23", "AT1G01020", "AT1G01040"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As2__AT1G01040__pheno_assn__5.4"
+            ]
+          },
+          "5": {
+            "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060", "Na23"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As2__AT1G01040__pheno_assn__5.4",
+              "As75__AT1G01020__pheno_assn__39.9",
+              "AT1G01010__AT1G01020__ppi_hithru__2.3",
+              "AT1G01010__AT1G01030__ppi_hithru__2.4",
+              "AT1G01010__AT1G01040__domain_co_occur__2.5",
+              "AT1G01010__AT1G01040__ppi_liter__170.5",
+              "AT1G01030__AT1G01050__gene_coexpr__2.6",
+              "AT1G01050__AT1G01060__ppi_liter__2.7"
+            ]
+          }
+        }
       }
     }
   },
   "search_nodes": {
-    "Mary Poppins": {
-      "0": {"nodes": [], "edges": []},
-      "1": {"nodes": [], "edges": []},
-      "5": {"nodes": [], "edges": []}
-    },
-    "GO:0005515": {
-      "0": {
-        "nodes": ["AT1G01040", "AT1G01090"],
-        "edges": []
-      },
-      "1": {
-        "nodes": ["As2", "AT1G01010", "AT1G01040", "AT1G01080", "AT1G01090"],
-        "edges": [
-          "As2__AT1G01040__pheno_assn__5.4",
-          "AT1G01010__AT1G01040__domain_co_occur__2.5",
-          "AT1G01010__AT1G01040__ppi_liter__170.5",
-          "AT1G01080__AT1G01090__ppi_liter__2.8"
-        ]
+    "search_text": {
+      "Mary Poppins": {
+        "distance": {
+          "0": {"nodes": [], "edges": []},
+          "1": {"nodes": [], "edges": []},
+          "5": {"nodes": [], "edges": []}
+        }
       },
-      "5": {
-        "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060", "AT1G01080",  "AT1G01090"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As2__AT1G01040__pheno_assn__5.4",
-          "As75__AT1G01020__pheno_assn__39.9",
-          "AT1G01010__AT1G01020__ppi_hithru__2.3",
-          "AT1G01010__AT1G01030__ppi_hithru__2.4",
-          "AT1G01010__AT1G01040__domain_co_occur__2.5",
-          "AT1G01010__AT1G01040__ppi_liter__170.5",
-          "AT1G01030__AT1G01050__gene_coexpr__2.6",
-          "AT1G01050__AT1G01060__ppi_liter__2.7",
-          "AT1G01080__AT1G01090__ppi_liter__2.8"
-        ]
+      "GO:0005515": {
+        "distance": {
+          "0": {
+            "nodes": ["AT1G01040", "AT1G01090"],
+            "edges": []
+          },
+          "1": {
+            "nodes": ["As2", "AT1G01010", "AT1G01040", "AT1G01080", "AT1G01090"],
+            "edges": [
+              "As2__AT1G01040__pheno_assn__5.4",
+              "AT1G01010__AT1G01040__domain_co_occur__2.5",
+              "AT1G01010__AT1G01040__ppi_liter__170.5",
+              "AT1G01080__AT1G01090__ppi_liter__2.8"
+            ]
+          },
+          "5": {
+            "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060", "AT1G01080",  "AT1G01090"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As2__AT1G01040__pheno_assn__5.4",
+              "As75__AT1G01020__pheno_assn__39.9",
+              "AT1G01010__AT1G01020__ppi_hithru__2.3",
+              "AT1G01010__AT1G01030__ppi_hithru__2.4",
+              "AT1G01010__AT1G01040__domain_co_occur__2.5",
+              "AT1G01010__AT1G01040__ppi_liter__170.5",
+              "AT1G01030__AT1G01050__gene_coexpr__2.6",
+              "AT1G01050__AT1G01060__ppi_liter__2.7",
+              "AT1G01080__AT1G01090__ppi_liter__2.8"
+            ]
+          }
+        }
       }
     }
   },
 
   "fetch_clusters": {
-    "i6-1": {
-      "0": {
-        "nodes": ["AT1G01040", "AT1G01090"],
-        "edges": []
-      },
-      "1": {
-        "nodes": ["As2", "AT1G01010", "AT1G01040", "AT1G01080", "AT1G01090"],
-        "edges": [
-          "As2__AT1G01040__pheno_assn__5.4",
-          "AT1G01010__AT1G01040__domain_co_occur__2.5",
-          "AT1G01010__AT1G01040__ppi_liter__170.5",
-          "AT1G01080__AT1G01090__ppi_liter__2.8"
-        ]
-      },
-      "5": {
-        "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060", "AT1G01080",  "AT1G01090"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As2__AT1G01040__pheno_assn__5.4",
-          "As75__AT1G01020__pheno_assn__39.9",
-          "AT1G01010__AT1G01020__ppi_hithru__2.3",
-          "AT1G01010__AT1G01030__ppi_hithru__2.4",
-          "AT1G01010__AT1G01040__domain_co_occur__2.5",
-          "AT1G01010__AT1G01040__ppi_liter__170.5",
-          "AT1G01030__AT1G01050__gene_coexpr__2.6",
-          "AT1G01050__AT1G01060__ppi_liter__2.7",
-          "AT1G01080__AT1G01090__ppi_liter__2.8"
-        ]
-      }
-    },
-    "i2-5__i6-2": {
-      "0": {
-        "nodes": ["AT1G01020", "AT1G01070"],
-        "edges": []
+    "cluster_ids": {
+      "Mary Poppins": {
+        "distance": {
+          "0": {"nodes": [], "edges": []},
+          "1": {"nodes": [], "edges": []},
+          "5": {"nodes": [], "edges": []}
+        }
       },
-      "1": {
-        "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01070"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As75__AT1G01020__pheno_assn__39.9",
-          "AT1G01010__AT1G01020__ppi_hithru__2.3"
-        ]
+      "markov_i6:1": {
+        "distance": {
+          "0": {
+            "nodes": ["AT1G01040", "AT1G01090"],
+            "edges": []
+          },
+          "1": {
+            "nodes": ["As2", "AT1G01010", "AT1G01040", "AT1G01080", "AT1G01090"],
+            "edges": [
+              "As2__AT1G01040__pheno_assn__5.4",
+              "AT1G01010__AT1G01040__domain_co_occur__2.5",
+              "AT1G01010__AT1G01040__ppi_liter__170.5",
+              "AT1G01080__AT1G01090__ppi_liter__2.8"
+            ]
+          },
+          "5": {
+            "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060", "AT1G01080",  "AT1G01090"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As2__AT1G01040__pheno_assn__5.4",
+              "As75__AT1G01020__pheno_assn__39.9",
+              "AT1G01010__AT1G01020__ppi_hithru__2.3",
+              "AT1G01010__AT1G01030__ppi_hithru__2.4",
+              "AT1G01010__AT1G01040__domain_co_occur__2.5",
+              "AT1G01010__AT1G01040__ppi_liter__170.5",
+              "AT1G01030__AT1G01050__gene_coexpr__2.6",
+              "AT1G01050__AT1G01060__ppi_liter__2.7",
+              "AT1G01080__AT1G01090__ppi_liter__2.8"
+            ]
+          }
+        }
       },
-      "5": {
-        "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060", "AT1G01070"],
-        "edges": [
-          "As2__AT1G01020__pheno_assn__8.4",
-          "As2__AT1G01040__pheno_assn__5.4",
-          "As75__AT1G01020__pheno_assn__39.9",
-          "AT1G01010__AT1G01020__ppi_hithru__2.3",
-          "AT1G01010__AT1G01030__ppi_hithru__2.4",
-          "AT1G01010__AT1G01040__domain_co_occur__2.5",
-          "AT1G01010__AT1G01040__ppi_liter__170.5",
-          "AT1G01030__AT1G01050__gene_coexpr__2.6",
-          "AT1G01050__AT1G01060__ppi_liter__2.7"
-        ]
+      "markov_i2:5__markov_i6:2": {
+        "distance": {
+          "0": {
+            "nodes": ["AT1G01020", "AT1G01070"],
+            "edges": []
+          },
+          "1": {
+            "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01070"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As75__AT1G01020__pheno_assn__39.9",
+              "AT1G01010__AT1G01020__ppi_hithru__2.3"
+            ]
+          },
+          "5": {
+            "nodes": ["As2", "As75", "AT1G01010", "AT1G01020", "AT1G01030", "AT1G01040", "AT1G01050", "AT1G01060", "AT1G01070"],
+            "edges": [
+              "As2__AT1G01020__pheno_assn__8.4",
+              "As2__AT1G01040__pheno_assn__5.4",
+              "As75__AT1G01020__pheno_assn__39.9",
+              "AT1G01010__AT1G01020__ppi_hithru__2.3",
+              "AT1G01010__AT1G01030__ppi_hithru__2.4",
+              "AT1G01010__AT1G01040__domain_co_occur__2.5",
+              "AT1G01010__AT1G01040__ppi_liter__170.5",
+              "AT1G01030__AT1G01050__gene_coexpr__2.6",
+              "AT1G01050__AT1G01060__ppi_liter__2.7"
+            ]
+          }
+        }
       }
     }
   }
diff --git a/test/djornl/test_data/cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I2_named.tsv b/test/djornl/test_data/I2_named.tsv
similarity index 61%
rename from test/djornl/test_data/cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I2_named.tsv
rename to test/djornl/test_data/I2_named.tsv
index 086a920..46f4498 100644
--- a/test/djornl/test_data/cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I2_named.tsv
+++ b/test/djornl/test_data/I2_named.tsv
@@ -1,3 +1,6 @@
+# data_type: cluster
+# prefix: markov_i2
+# title: Markov clustering, inflation = 2
 Cluster1	AT1G01010	AT1G01030	AT1G01040
 Cluster2	AT1G01050	AT1G01060	AT1G01070
 Cluster3	AT1G01080	AT1G01090
diff --git a/test/djornl/test_data/I4_named.tsv b/test/djornl/test_data/I4_named.tsv
new file mode 100644
index 0000000..147831e
--- /dev/null
+++ b/test/djornl/test_data/I4_named.tsv
@@ -0,0 +1,3 @@
+# prefix: markov_i4
+# title: Markov clustering, inflation = 4
+# data_type: cluster
diff --git a/test/djornl/test_data/cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I6_named.tsv b/test/djornl/test_data/I6_named.tsv
similarity index 53%
rename from test/djornl/test_data/cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I6_named.tsv
rename to test/djornl/test_data/I6_named.tsv
index 389cae2..b4680eb 100644
--- a/test/djornl/test_data/cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I6_named.tsv
+++ b/test/djornl/test_data/I6_named.tsv
@@ -1,3 +1,6 @@
+# data_type: cluster
+# prefix: markov_i6
+# title: Markov clustering, inflation = 6
 Cluster1	AT1G01040	AT1G01090
 Cluster2	AT1G01070
 Cluster3	AT1G01010	AT1G01020	AT1G01030
diff --git a/test/djornl/test_data/cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I4_named.tsv b/test/djornl/test_data/cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I4_named.tsv
deleted file mode 100644
index 8b13789..0000000
--- a/test/djornl/test_data/cluster_data/out.aranetv2_subnet_AT-CX_top10percent_anno_AF_082919.abc.I4_named.tsv
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/test/djornl/test_data/merged_edges-AMW-060820_AF.tsv b/test/djornl/test_data/edges.tsv
similarity index 100%
rename from test/djornl/test_data/merged_edges-AMW-060820_AF.tsv
rename to test/djornl/test_data/edges.tsv
diff --git a/test/djornl/test_data/manifest.yaml b/test/djornl/test_data/manifest.yaml
new file mode 100644
index 0000000..2eb28e3
--- /dev/null
+++ b/test/djornl/test_data/manifest.yaml
@@ -0,0 +1,19 @@
+- data_type: edge
+  path: edges.tsv
+  date_created: 2020-12-25
+
+- data_type: node
+  path: nodes.csv
+  date_created: 2019-01-01
+
+- data_type: cluster
+  prefix: markov_i2
+  path: I2_named.tsv
+
+- data_type: cluster
+  prefix: markov_i4
+  path: I4_named.tsv
+
+- data_type: cluster
+  prefix: markov_i6
+  path: I6_named.tsv
diff --git a/test/djornl/test_data/aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv b/test/djornl/test_data/nodes.csv
similarity index 99%
rename from test/djornl/test_data/aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv
rename to test/djornl/test_data/nodes.csv
index 5bc0e1d..a032142 100644
--- a/test/djornl/test_data/aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv
+++ b/test/djornl/test_data/nodes.csv
@@ -1,3 +1,4 @@
+# data_type: node
 node_id,node_type,transcript,gene_symbol,gene_full_name,gene_model_type,TAIR_Computational_description,TAIR_Curator_summary,TAIR_short_description,GO_descr,GO_terms,MapMan_bin,MapMan_name,MapMan_descr,pheno_AraGWAS_ID,pheno_descrip1,pheno_descrip2,pheno_descrip3,pheno_ref,UserNotes
 As2,pheno,,,,,,,,,,,,,10.21958/phenotype:103,,bacterial disease resistance,The resistance exhibited by a plant or a group of plants (population) in response to the disease caused by a bacterial pathogen infection as compared to the susceptible and/or the reference plants of the same species. [GR:pj],"Atwell et. al, Nature 2010",
 As75,pheno,,,,,,,,,,,,,10.21958/phenotype:67,"Arsenic concentrations in leaves, grown in soil. Elemental analysis was performed with an ICP-MS (PerkinElmer). Sample normalized to calculated weights as described in Baxter et al., 2008",arsenic concentration,A mineral and ion content related trait (TO:0000465) which is the concentration of arsenic (CHEBI:22632) in some plant structure (PO:0009011). [GR:Karthik],"Atwell et. al, Nature 2010",
diff --git a/test/stored_queries/test_djornl.py b/test/stored_queries/test_djornl.py
index df2a7e5..7c502a3 100644
--- a/test/stored_queries/test_djornl.py
+++ b/test/stored_queries/test_djornl.py
@@ -6,10 +6,8 @@
 import unittest
 import requests
 import os
-import glob
-import yaml
 
-from test.helpers import get_config, assert_subset, modified_environ
+from test.helpers import get_config, modified_environ
 from test.stored_queries.helpers import create_test_docs
 from importers.djornl.parser import DJORNL_Parser
 
@@ -79,6 +77,7 @@ def check_expected_results(self, description, response, expected):
 
         if _VERBOSE:
             print("Running test " + description)
+
         results = response['results'][0]
         self.assertEqual(
             set([n["_key"] for n in results['nodes']]),
@@ -93,44 +92,48 @@ def check_expected_results(self, description, response, expected):
 
     def test_fetch_all(self):
 
-        # expect all the nodes from load_node_metadata and all the edges from load_edges
-        expected = {
-            "nodes": [n["_key"] for n in self.json_data['load_node_metadata']['nodes']],
-            "edges": [ {
-              "_to":        e["_to"],
-              "_from":      e["_from"],
-              "score":      e["score"],
-              "edge_type":  e["edge_type"] } for e in self.json_data['load_edges']['edges']
-            ]
-        }
-
+        response = self.submit_query('djornl_fetch_all')
         self.check_expected_results(
             "djornl_fetch_all",
-            self.submit_query('djornl_fetch_all'),
+            response,
             self.json_data['fetch_all']
         )
 
+        # ensure that all the cluster data is returned OK
+        node_data = response['results'][0]['nodes']
+        nodes_with_clusters = [json.dumps({
+            '_key':     n['_key'],
+            'clusters': n['clusters']
+        }) for n in node_data if 'clusters' in n]
+        self.assertEqual(
+            set(nodes_with_clusters),
+            set([json.dumps(this) for this in self.json_data['load_cluster_data']['nodes']])
+        )
 
     # indexing schema in results.json
-    # self.json_data[query][primary_param][distance_param]
-    # if primary_param is an array, join the array entities with "__"
+    # self.json_data[query_name][param_name][param_value]["distance"][distance_param]
+    # e.g. for fetch_clusters data:
+    # "fetch_clusters": {
+    #   "cluster_ids": {
+    #     "markov_i2:6__markov_i4:3": {
+    #       "distance": {
+    #         1: {
+    #           "nodes": [ node IDs ],
+    #           "edges": [ edge data ],
+    #         }
+    #       }
+    #     }
+    #   }
+    # }
+    # if param_value is an array, join the array entities with "__"
     # results are in the form {"nodes": [...], "edges": [...]}
     # nodes are represented as a list of node[_key]
     # edges are objects with keys _to, _from, edge_type and score
 
-    def test_fetch_phenotypes_no_results(self):
-
-        resp = self.submit_query('djornl_fetch_phenotypes', {
-            # gene node
-            "keys": ["AT1G01010"],
-        })
-        self.assertEqual(resp['results'][0], self.no_results)
-
-
     def test_fetch_phenotypes(self):
 
-        for fetch_args in self.json_data['fetch_phenotypes'].keys():
-            for distance in self.json_data['fetch_phenotypes'][fetch_args].keys():
+        for (fetch_args, key_data) in self.json_data['fetch_phenotypes']['keys'].items():
+            for (distance, distance_data) in key_data['distance'].items():
                 resp = self.submit_query('djornl_fetch_phenotypes', {
                     "keys": fetch_args.split('__'),
                     "distance": int(distance),
@@ -138,22 +141,14 @@ def test_fetch_phenotypes(self):
                 self.check_expected_results(
                     "fetch phenotypes with args " + fetch_args + " and distance " + distance,
                     resp,
-                    self.json_data['fetch_phenotypes'][fetch_args][distance]
+                    distance_data
                 )
 
 
-    def test_fetch_genes_no_results(self):
-        resp = self.submit_query('djornl_fetch_genes', {
-            # phenotype node
-            "keys": ["As2"],
-        })
-        self.assertEqual(resp['results'][0], self.no_results)
-
-
     def test_fetch_genes(self):
 
-        for fetch_args in self.json_data['fetch_genes'].keys():
-            for distance in self.json_data['fetch_genes'][fetch_args].keys():
+        for (fetch_args, key_data) in self.json_data['fetch_genes']['keys'].items():
+            for (distance, distance_data) in key_data['distance'].items():
                 resp = self.submit_query('djornl_fetch_genes', {
                     "keys": fetch_args.split('__'),
                     "distance": int(distance),
@@ -161,54 +156,29 @@ def test_fetch_genes(self):
                 self.check_expected_results(
                     "fetch genes with args " + fetch_args + " and distance " + distance,
                     resp,
-                    self.json_data['fetch_genes'][fetch_args][distance]
+                    distance_data
                 )
 
 
-    def test_fetch_clusters_no_results(self):
-
-        resp = self.submit_query('djornl_fetch_clusters', {
-            'cluster_i2_ids': [666],
-            'cluster_i4_ids': [666],
-            'cluster_i6_ids': [666],
-        })
-        self.assertEqual(resp['results'][0], self.no_results)
-
-
     def test_fetch_clusters(self):
 
-        for fetch_args in self.json_data['fetch_clusters'].keys():
-            cluster_args = {}
-            for arg in fetch_args.split('__'):
-                [c_name, c_id] = arg.split('-', maxsplit=1)
-                if "cluster_" + c_name + "_ids" in cluster_args:
-                    cluster_args["cluster_" + c_name + "_ids"] += int(c_id)
-                else:
-                    cluster_args["cluster_" + c_name + "_ids"] = [int(c_id)]
-
-            for distance in self.json_data['fetch_clusters'][fetch_args].keys():
-                cluster_args['distance'] = int(distance)
-                resp = self.submit_query('djornl_fetch_clusters', cluster_args)
+        for (fetch_args, cluster_data) in self.json_data['fetch_clusters']['cluster_ids'].items():
+            for (distance, distance_data) in cluster_data['distance'].items():
+                resp = self.submit_query('djornl_fetch_clusters', {
+                    "cluster_ids": fetch_args.split('__'),
+                    "distance": int(distance),
+                })
                 self.check_expected_results(
                     "fetch clusters with args " + fetch_args + " and distance " + distance,
                     resp,
-                    self.json_data['fetch_clusters'][fetch_args][distance]
+                    distance_data
                 )
 
-    @unittest.skip('This test is disabled until automated view loading is possible')
-    def test_search_nodes_no_results(self):
-
-        resp = self.submit_query('djornl_search_nodes', {
-            "search_text": "Mary Poppins",
-        })
-        self.assertEqual(resp['results'][0], self.no_results)
-
 
-    @unittest.skip('This test is disabled until automated view loading is possible')
     def test_search_nodes(self):
 
-        for search_text in self.json_data['search_nodes'].keys():
-            for distance in self.json_data['search_nodes'][search_text].keys():
+        for (search_text, search_data) in self.json_data['search_nodes']['search_text'].items():
+            for (distance, distance_data) in search_data['distance'].items():
                 resp = self.submit_query('djornl_search_nodes', {
                     "search_text": search_text,
                     "distance": int(distance),
@@ -216,5 +186,5 @@ def test_search_nodes(self):
                 self.check_expected_results(
                     "search nodes with args " + search_text + " and distance " + distance,
                     resp,
-                    self.json_data['search_nodes'][search_text][distance]
+                    distance_data
                 )
diff --git a/test/stored_queries/test_djornl_parser.py b/test/stored_queries/test_djornl_parser.py
index b2043b9..91dfdb5 100644
--- a/test/stored_queries/test_djornl_parser.py
+++ b/test/stored_queries/test_djornl_parser.py
@@ -10,7 +10,6 @@
 import requests
 import os
 import contextlib
-
 from importers.djornl.parser import DJORNL_Parser
 
 from test.helpers import get_config, assert_subset, modified_environ
@@ -40,69 +39,86 @@ def init_parser_with_path(self, root_path):
             return parser
 
 
-    def test_load_empty_files(self):
-        """ test loading files containing no data """
+    def test_load_no_manifest(self):
+        """ test loading when the manifest does not exist """
+        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'no_manifest')
+        err_str = 'No manifest file found at ' + os.path.join(RES_ROOT_DATA_PATH, 'manifest.yaml')
+        with self.assertRaisesRegex(RuntimeError, err_str):
+            self.init_parser_with_path(RES_ROOT_DATA_PATH)
 
-        # path: test/djornl/empty_files
-        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'empty_files')
-        parser = self.init_parser_with_path(RES_ROOT_DATA_PATH)
 
-        self.assertEqual(parser.load_edges(), {"nodes": [], "edges": []})
-        self.assertEqual(parser.load_node_metadata(), {"nodes": []})
-        self.assertEqual(parser.load_cluster_data(), {"nodes": []})
+    def test_load_invalid_manifest(self):
+        """ test an invalid manifest file """
+        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'invalid_manifest')
+        err_str = "The manifest file failed validation with the following errors:"
+        with self.assertRaisesRegex(RuntimeError, err_str):
+            self.init_parser_with_path(RES_ROOT_DATA_PATH)
+
+
+    def test_load_invalid_file(self):
+        """ test loading when a file specified in the manifest is a directory """
+
+        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'invalid_file')
+
+        # edges: directory, not a file
+        err_str = os.path.join(RES_ROOT_DATA_PATH, "edges.tsv") + ": not a file"
+        with self.assertRaisesRegex(RuntimeError, err_str):
+            self.init_parser_with_path(RES_ROOT_DATA_PATH)
 
 
     def test_load_missing_files(self):
         """ test loading when files cannot be found """
 
-        # this dir does not contain the correct file structure
-        # path: test/djornl/empty_files/cluster_data
-        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'empty_files', 'cluster_data')
-        parser = self.init_parser_with_path(RES_ROOT_DATA_PATH)
+        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'missing_files')
+        # not found
+        err_str = os.path.join(RES_ROOT_DATA_PATH, "edges.tsv") + ': file does not exist'
+        with self.assertRaisesRegex(RuntimeError, err_str):
+            self.init_parser_with_path(RES_ROOT_DATA_PATH)
 
-        err_str = "No such file or directory: '" + RES_ROOT_DATA_PATH
-        with self.assertRaisesRegex(FileNotFoundError, err_str):
-            parser.load_edges()
 
-        with self.assertRaisesRegex(FileNotFoundError, err_str):
-            parser.load_node_metadata()
+    def test_load_empty_files(self):
+        """ test loading files containing no data """
 
-        with self.assertRaisesRegex(FileNotFoundError, err_str):
-            parser.load_cluster_data()
+        # path: test/djornl/empty_files
+        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'empty_files')
+        parser = self.init_parser_with_path(RES_ROOT_DATA_PATH)
+        self.assertEqual(parser.load_edges(), {"nodes": [], "edges": []})
+        self.assertEqual(parser.load_node_metadata(), {"nodes": []})
+        self.assertEqual(parser.load_cluster_data(), {"nodes": []})
 
 
-    def test_load_invalid_types(self):
-        """ test file format errors """
+    def test_load_col_count_errors(self):
+        """ test files with invalid numbers of columns """
 
-        # path: test/djornl/invalid_types
-        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'invalid_types')
+        # path: test/djornl/col_count_errors
+        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'col_count_errors')
         parser = self.init_parser_with_path(RES_ROOT_DATA_PATH)
 
         # invalid edge type
-        edge_err_msg = 'line 2: invalid edge type: AraGWAS-Some-Old-Rubbish-I-Made-Up'
+        edge_err_msg = 'line 6: expected 5 cols, found 3'
         with self.assertRaisesRegex(RuntimeError, edge_err_msg):
             parser.load_edges()
 
         # invalid node type
-        node_err_msg = 'line 4: invalid node type: Monkey'
+        node_err_msg = 'line 3: expected 20 cols, found 22'
         with self.assertRaisesRegex(RuntimeError, node_err_msg):
             parser.load_node_metadata()
 
 
-    def test_load_col_count_errors(self):
-        """ test files with invalid numbers of columns """
+    def test_load_invalid_types(self):
+        """ test file format errors """
 
-        # path: test/djornl/col_count_errors
-        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'col_count_errors')
+        # path: test/djornl/invalid_types
+        RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'invalid_types')
         parser = self.init_parser_with_path(RES_ROOT_DATA_PATH)
 
         # invalid edge type
-        edge_err_msg = 'line 6: expected 5 cols, found 3'
+        edge_err_msg = 'merged_edges-AMW-060820_AF.tsv line 3: invalid edge type: AraGWAS-Some-Old-Rubbish-I-Made-Up'
         with self.assertRaisesRegex(RuntimeError, edge_err_msg):
             parser.load_edges()
 
         # invalid node type
-        node_err_msg = 'line 3: expected 20 cols, found 22'
+        node_err_msg = 'aranet2-aragwas-MERGED-AMW-v2_091319_nodeTable.csv line 5: invalid node type: Monkey'
         with self.assertRaisesRegex(RuntimeError, node_err_msg):
             parser.load_node_metadata()
 
@@ -112,8 +128,6 @@ def test_load_valid_edge_data(self):
         RES_ROOT_DATA_PATH = os.path.join(_TEST_DIR, 'djornl', 'test_data')
         parser = self.init_parser_with_path(RES_ROOT_DATA_PATH)
 
-        self.maxDiff = None
-
         edge_data = parser.load_edges()
         self.assertEqual(
             edge_data,
@@ -143,3 +157,5 @@ def test_load_valid_cluster_data(self):
             self.json_data["load_cluster_data"]
         )
 
+        parser.check_data_delta()
+