From e4843f3c14e700ae12bc3f330585d15ef0cd0c79 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 14:26:15 -0500
Subject: [PATCH 01/11] First stab at adding clique leaders.

---
 node_normalizer/model/input.py |  5 +++++
 node_normalizer/normalizer.py  | 30 +++++++++++++++++++++++++-----
 node_normalizer/server.py      |  4 +++-
 node_normalizer/set_id.py      |  2 +-
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/node_normalizer/model/input.py b/node_normalizer/model/input.py
index ea7820e..b6bf757 100644
--- a/node_normalizer/model/input.py
+++ b/node_normalizer/model/input.py
@@ -41,6 +41,11 @@ class CurieList(BaseModel):
         title="Whether to return taxa for equivalent identifiers"
     )
 
+    include_clique_leaders: bool = Field(
+        default=False,
+        title="Whether to return clique leaders for conflated identifiers"
+    )
+
     class Config:
         schema_extra = {
             "example": {
diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py
index 32d9126..713c877 100644
--- a/node_normalizer/normalizer.py
+++ b/node_normalizer/normalizer.py
@@ -532,6 +532,7 @@ async def get_normalized_nodes(
         include_descriptions: bool = False,
         include_individual_types: bool = True,
         include_taxa: bool = True,
+        include_clique_leaders: bool = False,
 ) -> Dict[str, Optional[str]]:
     """
     Get value(s) for key(s) using redis MGET
@@ -555,6 +556,7 @@ async def get_normalized_nodes(
     canonical_ids = await app.state.eq_id_to_id_db.mget(*upper_curies, encoding='utf-8')
     canonical_nonan = [canonical_id for canonical_id in canonical_ids if canonical_id is not None]
     info_contents = {}
+    clique_leaders = {}
 
     # did we get some canonical ids
     if canonical_nonan:
@@ -569,14 +571,18 @@ async def get_normalized_nodes(
             other_ids = []
 
             if conflate_gene_protein:
-                other_ids.extend(await app.state.gene_protein_db.mget(*canonical_nonan, encoding='utf8'))
+                gene_protein_clique_leaders = await app.state.gene_protein_db.mget(*canonical_nonan, encoding='utf8')
+                other_ids.extend(gene_protein_clique_leaders)
+                clique_leaders.update(zip(canonical_nonan, gene_protein_clique_leaders))
 
             # logger.error(f"After conflate_gene_protein: {other_ids}")
 
             if conflate_chemical_drug:
-                other_ids.extend(await app.state.chemical_drug_db.mget(*canonical_nonan, encoding='utf8'))
+                drug_chemical_clique_leaders = await app.state.chemical_drug_db.mget(*canonical_nonan, encoding='utf8')
+                other_ids.extend(drug_chemical_clique_leaders)
+                clique_leaders.update(zip(canonical_nonan, drug_chemical_clique_leaders))
 
-            # logger.error(f"After conflate_chemical_drug: {other_ids}")
+        # logger.error(f"After conflate_chemical_drug: {other_ids}")
 
             # if there are other ids, then we want to rebuild eqids and types.  That's because even though we have them,
             # they're not necessarily first.  For instance if what came in and got canonicalized was a protein id
@@ -635,9 +641,13 @@ async def get_normalized_nodes(
         dereference_ids = dict()
         dereference_types = dict()
 
+    # Don't write out clique leaders unless its requested.
+    if not include_clique_leaders:
+        clique_leaders = None
+
     # output the final result
     normal_nodes = {
-        input_curie: await create_node(app, canonical_id, dereference_ids, dereference_types, info_contents,
+        input_curie: await create_node(app, canonical_id, dereference_ids, dereference_types, info_contents, clique_leaders,
                                        include_descriptions=include_descriptions,
                                        include_individual_types=include_individual_types,
                                        include_taxa=include_taxa,
@@ -680,7 +690,7 @@ async def get_info_content_attribute(app, canonical_nonan) -> dict:
     return new_attrib
 
 
-async def create_node(app, canonical_id, equivalent_ids, types, info_contents, include_descriptions=True,
+async def create_node(app, canonical_id, equivalent_ids, types, info_contents, clique_leaders, include_descriptions=True,
                       include_individual_types=False, include_taxa=False, conflations=None):
     """Construct the output format given the compressed redis data"""
     # It's possible that we didn't find a canonical_id
@@ -828,6 +838,16 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, i
     if include_taxa and node_taxa:
         node["taxa"] = sorted(node_taxa, key=get_numerical_curie_suffix)
 
+    # Add clique leaders if available.
+    if clique_leaders:
+        clique_leaders_for_node = clique_leaders.get(canonical_id, [])
+        clique_leaders_with_labels_and_types = [{
+            'identifier': cl,
+            'labels': [eid['l'] for eid in eids if eid['i'] == cl],
+            'types': [eid['t'] for eid in eids if eid['i'] == cl],
+        } for cl in clique_leaders_for_node]
+        node["clique_leaders"] = clique_leaders_with_labels_and_types
+
     # We need to remove `biolink:Entity` from the types returned.
     # (See explanation at https://github.com/TranslatorSRI/NodeNormalization/issues/173)
     if 'biolink:Entity' in types[canonical_id]:
diff --git a/node_normalizer/server.py b/node_normalizer/server.py
index 18ca7ca..2fc1430 100644
--- a/node_normalizer/server.py
+++ b/node_normalizer/server.py
@@ -265,6 +265,7 @@ async def get_normalized_node_handler(
     description: bool = fastapi.Query(False, description="Whether to return curie descriptions when possible"),
     individual_types: bool = fastapi.Query(False, description="Whether to return individual types for equivalent identifiers"),
     include_taxa: bool = fastapi.Query(True, description="Whether to return taxa for equivalent identifiers"),
+    include_clique_leaders: bool = fastapi.Query(False, description="Whether to return clique leaders for conflated identifiers"),
 ):
     """
     Get value(s) for key(s) using redis MGET
@@ -274,6 +275,7 @@ async def get_normalized_node_handler(
                                                   include_descriptions=description,
                                                   include_individual_types=individual_types,
                                                   include_taxa=include_taxa,
+                                                  include_clique_leaders=include_clique_leaders,
                                                   )
 
     # If curie contains at least one entry, then the only way normalized_nodes could be blank
@@ -295,7 +297,7 @@ async def get_normalized_node_handler_post(curies: CurieList):
     """
     normalized_nodes = await get_normalized_nodes(app, curies.curies, curies.conflate, curies.drug_chemical_conflate,
                                                   curies.description, include_individual_types=curies.individual_types,
-                                                  include_taxa=curies.include_taxa,
+                                                  include_taxa=curies.include_taxa, include_clique_leaders=curies.include_clique_leaders,
                                                   )
 
     # If curies.curies contains at least one entry, then the only way normalized_nodes could be blank
diff --git a/node_normalizer/set_id.py b/node_normalizer/set_id.py
index 3c3dc30..37a2745 100644
--- a/node_normalizer/set_id.py
+++ b/node_normalizer/set_id.py
@@ -41,7 +41,7 @@ async def generate_setid(app, curies, conflations) -> SetIDResponse:
 
     # We use get_normalized_nodes() to normalize all the CURIEs for us.
     normalization_results = await get_normalized_nodes(
-        app, curies, gene_protein_conflation, drug_chemical_conflation, include_descriptions=False, include_individual_types=False, include_taxa=False
+        app, curies, gene_protein_conflation, drug_chemical_conflation, include_descriptions=False, include_individual_types=False, include_taxa=False, include_clique_leaders=False
     )
 
     # We prepare a set of sorted, deduplicated curies.

From db9f2a32b932994bd32bc5176a53b64c91bcddfb Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 14:26:41 -0500
Subject: [PATCH 02/11] Added on:push trigger for testing.

---
 .github/workflows/release.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 800d57b..e9d8f8f 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,6 +1,7 @@
 name: 'Publish to GitHub Packages'
 
 on:
+    push:
     release:
         types: [published]
 

From 5351dc67f6c1eb85377dcedc808e8f1e73da4f1c Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 14:36:15 -0500
Subject: [PATCH 03/11] Attempt at fix.

---
 node_normalizer/normalizer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py
index 713c877..430a083 100644
--- a/node_normalizer/normalizer.py
+++ b/node_normalizer/normalizer.py
@@ -573,14 +573,14 @@ async def get_normalized_nodes(
             if conflate_gene_protein:
                 gene_protein_clique_leaders = await app.state.gene_protein_db.mget(*canonical_nonan, encoding='utf8')
                 other_ids.extend(gene_protein_clique_leaders)
-                clique_leaders.update(zip(canonical_nonan, gene_protein_clique_leaders))
+                clique_leaders.update(zip(*canonical_nonan, gene_protein_clique_leaders))
 
             # logger.error(f"After conflate_gene_protein: {other_ids}")
 
             if conflate_chemical_drug:
                 drug_chemical_clique_leaders = await app.state.chemical_drug_db.mget(*canonical_nonan, encoding='utf8')
                 other_ids.extend(drug_chemical_clique_leaders)
-                clique_leaders.update(zip(canonical_nonan, drug_chemical_clique_leaders))
+                clique_leaders.update(zip(*canonical_nonan, drug_chemical_clique_leaders))
 
         # logger.error(f"After conflate_chemical_drug: {other_ids}")
 
@@ -661,7 +661,7 @@ async def get_normalized_nodes(
     end_time = time.time_ns()
     logger.info(f"Normalized {len(curies)} nodes in {(end_time - start_time)/1_000_000:.2f} ms with arguments " +
                 f"(curies={curies}, conflate_gene_protein={conflate_gene_protein}, conflate_chemical_drug={conflate_chemical_drug}, " +
-                f"include_descriptions={include_descriptions}, include_individual_types={include_individual_types})")
+                f"include_descriptions={include_descriptions}, include_individual_types={include_individual_types}, include_clique_leaders={include_clique_leaders})")
 
     return normal_nodes
 

From 55753fb85834ee4331b779328de37cff082dcc57 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 14:46:18 -0500
Subject: [PATCH 04/11] Added logging for debugging.

---
 node_normalizer/normalizer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py
index 430a083..b876f46 100644
--- a/node_normalizer/normalizer.py
+++ b/node_normalizer/normalizer.py
@@ -840,6 +840,7 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
 
     # Add clique leaders if available.
     if clique_leaders:
+        logger.info(f"Getting clique_leaders from {clique_leaders} for canonical ID {canonical_id}")
         clique_leaders_for_node = clique_leaders.get(canonical_id, [])
         clique_leaders_with_labels_and_types = [{
             'identifier': cl,

From dda3646b789e7f1f79c755b6865333545c21943a Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 14:47:15 -0500
Subject: [PATCH 05/11] Replaced logging with logger.

---
 node_normalizer/normalizer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py
index b876f46..a921bb2 100644
--- a/node_normalizer/normalizer.py
+++ b/node_normalizer/normalizer.py
@@ -511,7 +511,7 @@ async def get_eqids_and_types(
     types_with_ancestors = []
     for index, typ in enumerate(types):
         if not typ:
-            logging.error(f"No type information found for '{canonical_nonan[index]}' with eqids: {eqids[index]}, "
+            logger.error(f"No type information found for '{canonical_nonan[index]}' with eqids: {eqids[index]}, "
                           f"replacing with {BIOLINK_NAMED_THING}")
             types_with_ancestors.append([BIOLINK_NAMED_THING])
         else:
@@ -625,7 +625,7 @@ async def get_normalized_nodes(
                     t = []
 
                 for other in dereference_others[canonical_id]:
-                    # logging.debug(f"e = {e}, other = {other}, deref_others_eqs = {deref_others_eqs}")
+                    # logger.debug(f"e = {e}, other = {other}, deref_others_eqs = {deref_others_eqs}")
                     e += deref_others_eqs[other]
                     t += deref_others_typ[other]
 
@@ -703,16 +703,16 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
 
     # If we have 'None' in the equivalent IDs, skip it so we don't confuse things further down the line.
     if None in equivalent_ids[canonical_id]:
-        logging.warning(f"Skipping None in canonical ID {canonical_id} among eqids: {equivalent_ids}")
+        logger.warning(f"Skipping None in canonical ID {canonical_id} among eqids: {equivalent_ids}")
         equivalent_ids[canonical_id] = [x for x in equivalent_ids[canonical_id] if x is not None]
         if not equivalent_ids[canonical_id]:
-            logging.warning(f"No non-None values found for ID {canonical_id} among filtered eqids: {equivalent_ids}")
+            logger.warning(f"No non-None values found for ID {canonical_id} among filtered eqids: {equivalent_ids}")
             return None
 
     # If we have 'None' in the canonical types, something went horribly wrong (specifically: we couldn't
     # find the type information for all the eqids for this clique). Return None.
     if None in types[canonical_id]:
-        logging.error(f"No types found for canonical ID {canonical_id} among types: {types}")
+        logger.error(f"No types found for canonical ID {canonical_id} among types: {types}")
         return None
 
     # OK, now we should have id's in the format [ {"i": "MONDO:12312", "l": "Scrofula"}, {},...]

From de096d2599a6cbbd549e529d3a233cd9764ece71 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 14:59:12 -0500
Subject: [PATCH 06/11] Attempt to fix clique leader querying.

---
 node_normalizer/normalizer.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py
index a921bb2..4aaad64 100644
--- a/node_normalizer/normalizer.py
+++ b/node_normalizer/normalizer.py
@@ -573,14 +573,16 @@ async def get_normalized_nodes(
             if conflate_gene_protein:
                 gene_protein_clique_leaders = await app.state.gene_protein_db.mget(*canonical_nonan, encoding='utf8')
                 other_ids.extend(gene_protein_clique_leaders)
-                clique_leaders.update(zip(*canonical_nonan, gene_protein_clique_leaders))
+                if include_clique_leaders:
+                    clique_leaders.update(zip(canonical_nonan, json.loads(gene_protein_clique_leaders)))
 
             # logger.error(f"After conflate_gene_protein: {other_ids}")
 
             if conflate_chemical_drug:
                 drug_chemical_clique_leaders = await app.state.chemical_drug_db.mget(*canonical_nonan, encoding='utf8')
                 other_ids.extend(drug_chemical_clique_leaders)
-                clique_leaders.update(zip(*canonical_nonan, drug_chemical_clique_leaders))
+                if include_clique_leaders:
+                    clique_leaders.update(zip(canonical_nonan, json.loads(drug_chemical_clique_leaders)))
 
         # logger.error(f"After conflate_chemical_drug: {other_ids}")
 
@@ -641,10 +643,6 @@ async def get_normalized_nodes(
         dereference_ids = dict()
         dereference_types = dict()
 
-    # Don't write out clique leaders unless its requested.
-    if not include_clique_leaders:
-        clique_leaders = None
-
     # output the final result
     normal_nodes = {
         input_curie: await create_node(app, canonical_id, dereference_ids, dereference_types, info_contents, clique_leaders,

From 53a82cb673383c5f439dc696538d154d83c209ea Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 15:54:49 -0500
Subject: [PATCH 07/11] Get the clique leaders translated again.

---
 node_normalizer/normalizer.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py
index 4aaad64..aa1dcae 100644
--- a/node_normalizer/normalizer.py
+++ b/node_normalizer/normalizer.py
@@ -571,25 +571,27 @@ async def get_normalized_nodes(
             other_ids = []
 
             if conflate_gene_protein:
-                gene_protein_clique_leaders = await app.state.gene_protein_db.mget(*canonical_nonan, encoding='utf8')
+                gene_protein_clique_leaders_strings = await app.state.gene_protein_db.mget(*canonical_nonan, encoding='utf8')
+                gene_protein_clique_leaders = [json.loads(oids) if oids else [] for oids in gene_protein_clique_leaders_strings]
                 other_ids.extend(gene_protein_clique_leaders)
                 if include_clique_leaders:
-                    clique_leaders.update(zip(canonical_nonan, json.loads(gene_protein_clique_leaders)))
+                    clique_leaders.update(zip(canonical_nonan, gene_protein_clique_leaders))
 
             # logger.error(f"After conflate_gene_protein: {other_ids}")
 
             if conflate_chemical_drug:
-                drug_chemical_clique_leaders = await app.state.chemical_drug_db.mget(*canonical_nonan, encoding='utf8')
+                drug_chemical_clique_leaders_strings = await app.state.chemical_drug_db.mget(*canonical_nonan, encoding='utf8')
+                drug_chemical_clique_leaders = [json.loads(oids) if oids else [] for oids in drug_chemical_clique_leaders_strings]
                 other_ids.extend(drug_chemical_clique_leaders)
                 if include_clique_leaders:
-                    clique_leaders.update(zip(canonical_nonan, json.loads(drug_chemical_clique_leaders)))
+                    clique_leaders.update(zip(canonical_nonan, drug_chemical_clique_leaders))
 
-        # logger.error(f"After conflate_chemical_drug: {other_ids}")
+            # logger.error(f"After conflate_chemical_drug: {other_ids}")
 
             # if there are other ids, then we want to rebuild eqids and types.  That's because even though we have them,
             # they're not necessarily first.  For instance if what came in and got canonicalized was a protein id
             # and we want gene first, then we're relying on the order of the other_ids to put it back in the right place.
-            other_ids = [json.loads(oids) if oids else [] for oids in other_ids]
+            # other_ids = [json.loads(oids) if oids else [] for oids in other_ids]
 
             # Until we added conflate_chemical_drug, canonical_nonan and other_ids would always have the same
             # length, so we could figure out mappings from one to the other just by doing:

From b304088549209b30e9c0d0fb919acab61001a4e3 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 16:38:22 -0500
Subject: [PATCH 08/11] Fixed up output.

---
 node_normalizer/normalizer.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py
index aa1dcae..ec6c1ee 100644
--- a/node_normalizer/normalizer.py
+++ b/node_normalizer/normalizer.py
@@ -731,8 +731,7 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
         identifiers_with_labels = eids
     else:
         # We have a conflation going on! To replicate Babel's behavior, we need to run the algorithem
-        # on the list of labels corresponding to the first
-        # So we need to run the algorithm on the first set of identifiers that have any
+        # on the list of labels corresponding to the first set of identifiers that have any
         # label whatsoever.
         identifiers_with_labels = []
         curies_already_checked = set()
@@ -810,12 +809,19 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
 
     # now need to reformat the identifier keys.  It could be cleaner but we have to worry about if there is a label
     descriptions = []
+    clique_leaders_output = {}
     node_taxa = set()
     node["equivalent_identifiers"] = []
     for eqid in eids:
         eq_item = {"identifier": eqid["i"]}
         if "l" in eqid and eqid["l"]:
             eq_item["label"] = eqid["l"]
+            if clique_leaders and eqid["i"] in clique_leaders:
+                clique_leaders_output[eqid["i"]] = {
+                    "identifier": eqid["i"],
+                    "label": eqid["l"],
+                    "biolink_type": types.get(eqid["i"], ["UNKNOWN"])[0],
+                }
         # if descriptions is enabled, add it to descriptions.
         if include_descriptions and "d" in eqid and len(eqid["d"]) > 0:
             desc = eqid["d"][0]
@@ -840,14 +846,12 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
 
     # Add clique leaders if available.
     if clique_leaders:
-        logger.info(f"Getting clique_leaders from {clique_leaders} for canonical ID {canonical_id}")
-        clique_leaders_for_node = clique_leaders.get(canonical_id, [])
-        clique_leaders_with_labels_and_types = [{
-            'identifier': cl,
-            'labels': [eid['l'] for eid in eids if eid['i'] == cl],
-            'types': [eid['t'] for eid in eids if eid['i'] == cl],
-        } for cl in clique_leaders_for_node]
-        node["clique_leaders"] = clique_leaders_with_labels_and_types
+        # If there are any clique leader IDs we haven't included in clique_leaders_output,
+        # insert it anyway at this point. This shouldn't happen, but let's be careful.
+        missing_clique_leaders = (clique_leaders_output.keys() - clique_leaders)
+        for cl_id in missing_clique_leaders:
+            clique_leaders_output[cl_id] = {"identifier": cl_id, "biolink_type": types.get(cl_id, ["UNKNOWN"])[0]}
+        node["clique_leaders"] = clique_leaders_output
 
     # We need to remove `biolink:Entity` from the types returned.
     # (See explanation at https://github.com/TranslatorSRI/NodeNormalization/issues/173)

From 6abcd84d103aac2c1aa3017075d220d460e1ac97 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 16:52:31 -0500
Subject: [PATCH 09/11] Bugfixes.

---
 node_normalizer/normalizer.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py
index ec6c1ee..e6a7486 100644
--- a/node_normalizer/normalizer.py
+++ b/node_normalizer/normalizer.py
@@ -816,12 +816,6 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
         eq_item = {"identifier": eqid["i"]}
         if "l" in eqid and eqid["l"]:
             eq_item["label"] = eqid["l"]
-            if clique_leaders and eqid["i"] in clique_leaders:
-                clique_leaders_output[eqid["i"]] = {
-                    "identifier": eqid["i"],
-                    "label": eqid["l"],
-                    "biolink_type": types.get(eqid["i"], ["UNKNOWN"])[0],
-                }
         # if descriptions is enabled, add it to descriptions.
         if include_descriptions and "d" in eqid and len(eqid["d"]) > 0:
             desc = eqid["d"][0]
@@ -837,6 +831,15 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
             eq_item["type"] = eqid['types'][-1]
         node["equivalent_identifiers"].append(eq_item)
 
+        if clique_leaders and eqid["i"] in clique_leaders:
+            clique_leaders_output[eqid["i"]] = {
+                "identifier": eqid["i"],
+                "label": eq_item.get("label", ""),
+                "description": eq_item.get("description", ""),
+                "taxa": eq_item.get("taxa", []),
+                "type": eq_item.get("type", "UNKNOWN")
+            }
+
     if include_descriptions and descriptions:
         node["descriptions"] = descriptions
         node["id"]["description"] = descriptions[0]
@@ -848,7 +851,7 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
     if clique_leaders:
         # If there are any clique leader IDs we haven't included in clique_leaders_output,
         # insert it anyway at this point. This shouldn't happen, but let's be careful.
-        missing_clique_leaders = (clique_leaders_output.keys() - clique_leaders)
+        missing_clique_leaders = (clique_leaders - clique_leaders_output.keys())
         for cl_id in missing_clique_leaders:
             clique_leaders_output[cl_id] = {"identifier": cl_id, "biolink_type": types.get(cl_id, ["UNKNOWN"])[0]}
         node["clique_leaders"] = clique_leaders_output

From 321fb85233ad4126145417934840857dfce6c57e Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 17:15:07 -0500
Subject: [PATCH 10/11] Fix clique leader output.

---
 node_normalizer/normalizer.py | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py
index e6a7486..b3361e6 100644
--- a/node_normalizer/normalizer.py
+++ b/node_normalizer/normalizer.py
@@ -831,14 +831,16 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
             eq_item["type"] = eqid['types'][-1]
         node["equivalent_identifiers"].append(eq_item)
 
-        if clique_leaders and eqid["i"] in clique_leaders:
-            clique_leaders_output[eqid["i"]] = {
-                "identifier": eqid["i"],
-                "label": eq_item.get("label", ""),
-                "description": eq_item.get("description", ""),
-                "taxa": eq_item.get("taxa", []),
-                "type": eq_item.get("type", "UNKNOWN")
-            }
+        if clique_leaders and canonical_id in clique_leaders and eqid["i"] in clique_leaders[canonical_id]:
+            clique_leaders_output[eqid["i"]] = { "identifier": eqid["i"] }
+            if "label" in eq_item:
+                clique_leaders_output[eqid["i"]]["label"] = eq_item["label"]
+            if "description" in eq_item:
+                clique_leaders_output[eqid["i"]]["description"] = eq_item["description"]
+            if "taxa" in eq_item:
+                clique_leaders_output[eqid["i"]]["taxa"] = eqid["taxa"]
+            if "type" in eq_item:
+                clique_leaders_output[eqid["i"]]["type"] = eqid["type"]
 
     if include_descriptions and descriptions:
         node["descriptions"] = descriptions
@@ -849,12 +851,14 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
 
     # Add clique leaders if available.
     if clique_leaders:
-        # If there are any clique leader IDs we haven't included in clique_leaders_output,
-        # insert it anyway at this point. This shouldn't happen, but let's be careful.
-        missing_clique_leaders = (clique_leaders - clique_leaders_output.keys())
-        for cl_id in missing_clique_leaders:
-            clique_leaders_output[cl_id] = {"identifier": cl_id, "biolink_type": types.get(cl_id, ["UNKNOWN"])[0]}
-        node["clique_leaders"] = clique_leaders_output
+        node["clique_leaders"] = []
+        for cl_id in clique_leaders:
+            if cl_id in clique_leaders_output:
+                node["clique_leaders"].append(clique_leaders_output[cl_id])
+            else:
+                node["clique_leaders"].append({
+                    "identifier": cl_id,
+                })
 
     # We need to remove `biolink:Entity` from the types returned.
     # (See explanation at https://github.com/TranslatorSRI/NodeNormalization/issues/173)

From 1ad6572b498cc98a2fa05a418e4ff6903c320717 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@renci.org>
Date: Mon, 15 Dec 2025 17:22:54 -0500
Subject: [PATCH 11/11] Bugfix.

---
 node_normalizer/normalizer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/node_normalizer/normalizer.py b/node_normalizer/normalizer.py
index b3361e6..4b9c007 100644
--- a/node_normalizer/normalizer.py
+++ b/node_normalizer/normalizer.py
@@ -816,6 +816,7 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
         eq_item = {"identifier": eqid["i"]}
         if "l" in eqid and eqid["l"]:
             eq_item["label"] = eqid["l"]
+
         # if descriptions is enabled, add it to descriptions.
         if include_descriptions and "d" in eqid and len(eqid["d"]) > 0:
             desc = eqid["d"][0]
@@ -838,9 +839,9 @@ async def create_node(app, canonical_id, equivalent_ids, types, info_contents, c
             if "description" in eq_item:
                 clique_leaders_output[eqid["i"]]["description"] = eq_item["description"]
             if "taxa" in eq_item:
-                clique_leaders_output[eqid["i"]]["taxa"] = eqid["taxa"]
+                clique_leaders_output[eqid["i"]]["taxa"] = eq_item["taxa"]
             if "type" in eq_item:
-                clique_leaders_output[eqid["i"]]["type"] = eqid["type"]
+                clique_leaders_output[eqid["i"]]["type"] = eq_item["type"]
 
     if include_descriptions and descriptions:
         node["descriptions"] = descriptions