diff --git a/grma/donorsgraph/build_donors_graph.py b/grma/donorsgraph/build_donors_graph.py
index 66b06c4..5c5faaf 100644
--- a/grma/donorsgraph/build_donors_graph.py
+++ b/grma/donorsgraph/build_donors_graph.py
@@ -10,7 +10,8 @@
 from grma.donorsgraph.create_lol import LolBuilder
 from grma.match.graph_wrapper import Graph
 from grma.utilities.geno_representation import HashableArray
-from grma.utilities.utils import gl_string_to_integers, tuple_geno_to_int, print_time
+from grma.utilities.utils import print_time, geno_to_int, gl_string_to_hash
+from bidict import bidict
 
 
 class BuildMatchingGraph:
@@ -19,7 +20,7 @@ class BuildMatchingGraph:
     It gets a path to directory with the donors' file, builds the graph and saved it as LOL graph using Cython.
     """
 
-    __slots__ = "_verbose", "_graph", "_edges"
+    __slots__ = "_verbose", "_graph", "_edges", "bidirectional_dict"
 
     def __init__(self, path_to_donors_directory: str, verbose: bool = False):
         """
@@ -31,19 +32,20 @@ def __init__(self, path_to_donors_directory: str, verbose: bool = False):
         self._verbose = verbose
         self._graph = None  # LOL dict-representation
         self._edges: List[Edge] = []  # edge-list
+        self.bidirectional_dict = bidict()
         self._save_graph_as_edges(path_to_donors_directory)
 
     def _create_classes_edges(self, geno, class_, layers):
-        int_class = tuple_geno_to_int(class_)
+        hash_class = gl_string_to_hash(str(class_)) % 1000000000 + 1000000000
 
-        self._edges.append(Edge(int_class, geno, 0))
+        self._edges.append(Edge(hash_class, geno, 0))
 
         # check if the class node was created
-        if int_class not in layers["CLASS"]:
-            layers["CLASS"].add(int_class)
-            self._create_subclass_edges(class_, int_class, layers)
+        if hash_class not in layers["CLASS"]:
+            layers["CLASS"].add(hash_class)
+            self._create_subclass_edges(class_, hash_class, layers)
 
-    def _create_subclass_edges(self, class_, int_class, layers):
+    def _create_subclass_edges(self, class_, hash_class, layers):
         """
         subclasses edges are created by dropping an allele from a class.
         each allele we drop, will be replaced with zero,
@@ -56,19 +58,18 @@ def _create_subclass_edges(self, class_, int_class, layers):
         # set the missing allele to always be the second allele in the locus
         for i in range(num_of_alleles):
             if i % 2 == 0:
-                subclass_alleles.add(
-                    tuple_geno_to_int(tuple(class_[0:i] + (0,) + class_[i + 1 :]))
-                )
+                subclass = tuple(class_[0:i] + (0,) + class_[i + 1 :])
             else:
-                subclass_alleles.add(
-                    tuple_geno_to_int(
-                        tuple(class_[0 : i - 1] + (0, class_[i - 1]) + class_[i + 1 :])
-                    )
+                subclass = tuple(
+                    class_[0 : i - 1] + (0, class_[i - 1]) + class_[i + 1 :]
                 )
 
+            hash_subclass = gl_string_to_hash(str(subclass)) % 1000000000 + 2000000000
+            subclass_alleles.add(hash_subclass)
+
         # add subclass->class edges
         for sub in subclass_alleles:
-            self._edges.append(Edge(sub, int_class, 0))
+            self._edges.append(Edge(sub, hash_class, 0))
             if sub not in layers["SUBCLASS"]:
                 layers["SUBCLASS"].add(sub)
 
@@ -101,16 +102,27 @@ def _save_graph_as_edges(self, path_to_donors_directory: str | os.PathLike):
                 ):
                     # retrieve all line's parameters
                     donor_id, geno, probability, index = line.strip().split(",")
-                    donor_id = int(donor_id)
+                    donor_id = -1 * int(donor_id)
                     index = int(index)
                     probability = float(probability)
 
                     # convert geno to list of integers
-                    geno = gl_string_to_integers(geno)
-
-                    # sort alleles for each HLA-X
-                    for x in range(0, len(geno), 2):
-                        geno[x : x + 2] = sorted(geno[x : x + 2])
+                    alleles = [
+                        allele
+                        for locus in geno.split("^")
+                        for allele in locus.split("+")
+                    ]
+                    geno = []
+                    for allele in alleles:
+                        if "N" in allele:
+                            geno.append(0)
+                        elif allele in self.bidirectional_dict:
+                            geno.append(self.bidirectional_dict[allele])
+                        else:
+                            self.bidirectional_dict[allele] = (
+                                len(self.bidirectional_dict) + 3
+                            )
+                            geno.append(self.bidirectional_dict[allele])
                     geno = HashableArray(geno)
 
                     # handle new donor appearance in file
@@ -135,7 +147,7 @@ def _save_graph_as_edges(self, path_to_donors_directory: str | os.PathLike):
                     # continue creation of classes and subclasses
                     if geno not in layers["GENOTYPE"]:
                         layers["GENOTYPE"].add(geno)
-                        CLASS_I_END = -2 * int(-len(geno)/4 - 0.5)
+                        CLASS_I_END = -2 * int(-len(geno) / 4 - 0.5)
                         geno_class1 = tuple(geno[:CLASS_I_END])
                         geno_class2 = tuple(geno[CLASS_I_END:])
                         self._create_classes_edges(geno, geno_class1, layers)
@@ -150,6 +162,8 @@ def _save_graph_as_edges(self, path_to_donors_directory: str | os.PathLike):
 
         # add the last donor to edgelist
         for HLA, probability in probability_dict.items():
+            if last_id == 0:
+                print("last_id = 0")
             self._edges.append(Edge(HLA, last_id, probability / total_probability))
             self._edges.append(Edge(last_id, HLA, probability / total_probability))
 
@@ -176,4 +190,5 @@ def to_pickle(self, path: Union[str, os.PathLike]):
 
         :param path: A path to save the pickled object
         """
-        pickle.dump(self._graph, open(path, "wb"))
+        graph_bdict = [self._graph, self.bidirectional_dict]
+        pickle.dump(graph_bdict, open(path, "wb"))
diff --git a/grma/donorsgraph/create_lol.py b/grma/donorsgraph/create_lol.py
index 8365227..0789a4d 100644
--- a/grma/donorsgraph/create_lol.py
+++ b/grma/donorsgraph/create_lol.py
@@ -6,7 +6,7 @@
 from collections import OrderedDict
 
 from grma.donorsgraph import Edge
-from grma.utilities.utils import print_time, tuple_geno_to_int
+from grma.utilities.utils import print_time
 
 
 class LolBuilder:
@@ -76,10 +76,10 @@ def _convert(self, layers: Dict[str, Set]):
         arrays_start = free
         # map lol-ids to arrays
         # given an lol_id, the mapping will be map_number_to_arr_node[lol_id - arrays_start, :]
-        geno = layers['GENOTYPE'].pop()
-        layers['GENOTYPE'].add(geno)
+        geno = layers["GENOTYPE"].pop()
+        layers["GENOTYPE"].add(geno)
         map_number_to_arr_node = np.zeros(
-            (len(layers["GENOTYPE"]), len(geno)), dtype=np.uint16
+            (len(layers["GENOTYPE"]), len(geno)), dtype=np.uint32
         )
         for i, geno in tqdm(
             enumerate(layers["GENOTYPE"]),
@@ -87,7 +87,7 @@ def _convert(self, layers: Dict[str, Set]):
             disable=not self._verbose,
         ):
             map_node_to_number[geno] = free
-            map_number_to_arr_node[i, :] = geno.np()
+            map_number_to_arr_node[i] = geno
             free += 1
 
         # map classes to lol-id.
@@ -112,7 +112,7 @@ def _convert(self, layers: Dict[str, Set]):
                 )
                 if y < subclasses_start
             ],
-            dtype=np.uint32,
+            dtype=np.int32,
         )
 
         print_time("(3/6) Create the index list")
@@ -186,12 +186,6 @@ def _convert(self, layers: Dict[str, Set]):
                     weights_list=weights_list,
                 )
 
-        # replace geno hashable array to more efficient representation.
-        for array_geno in layers["GENOTYPE"]:
-            int_geno = tuple_geno_to_int(array_geno)
-            map_node_to_number[int_geno] = map_node_to_number[array_geno]
-            del map_node_to_number[array_geno]
-
         del self._graph
         del layers
         gc.collect()
diff --git a/grma/match/donors_matching.py b/grma/match/donors_matching.py
index cbd0dc2..bf19534 100644
--- a/grma/match/donors_matching.py
+++ b/grma/match/donors_matching.py
@@ -1,7 +1,11 @@
 from collections.abc import Iterator
 from typing import List, Tuple, Set, Iterable, Dict
 from typing import Sequence
+from grma.match.hla_match import batch_locuses_match_between_genos
 
+from ..utilities.utils import geno_to_int, gl_string_to_hash
+import ast
+from bidict import bidict
 import networkx as nx
 import numpy as np
 import pandas as pd
@@ -13,8 +17,6 @@
     donor_mismatch_format,
     drop_less_than_7_matches,
     check_similarity,
-    gl_string_to_integers,
-    tuple_geno_to_int,
     print_time,
 )
 
@@ -37,15 +39,24 @@ def _init_results_df(donors_info):
     fields_in_results = {
         "Patient_ID": [],
         "Donor_ID": [],
-        "GvH_Mismatches": [],
-        "HvG_Mismatches": [],
-        "Number_Of_Mismatches": [],
         "Matching_Probability": [],
         "Match_Probability": [],
         "Permissive/Non-Permissive": [],
-        "Match_Between_Most_Commons": [],
+        # "Match_Between_Most_Commons": [],
     }
 
+    # Define the mismatch types and their order
+    mismatch_types = ["", "HvG", "GvH"]
+    for t in mismatch_types:
+        for i in range(4):  # 0 to 3 mismatches
+            name = (
+                f"chance_for_{i}_{t}_Mismatches" if t else f"chance_for_{i}_Mismatches"
+            )
+            fields_in_results[name] = []
+        # Add "more than 3" for each mismatch type
+        name = f"chance_for_3+_{t}_Mismatches" if t else f"chance_for_3+_Mismatches"
+        fields_in_results[name] = []
+
     donors_db_fields = DONORS_DB.columns.values.tolist()
     for di in donors_info:
         if di in donors_db_fields:
@@ -53,26 +64,28 @@ def _init_results_df(donors_info):
     return pd.DataFrame(fields_in_results)
 
 
-def locuses_match_between_genos(geno_pat, geno_don):
-    matches = []
-    total_gvh = 0
-    total_hvg = 0
-
-    for i in range(0, len(geno_pat), 2):
-        a1, b1 = geno_pat[i],   geno_pat[i + 1]
-        a2, b2 = geno_don[i],   geno_don[i + 1]
-
-        s1 = int(a1 == a2) + int(b1 == b2)
-        s2 = int(a1 == b2) + int(b1 == a2)
-        matches.append(max(s1, s2))
-
-        p_set = {x for x in (a1, b1) if x not in (None, 0)}
-        d_set = {x for x in (a2, b2) if x not in (None, 0)}
-
-        total_gvh += len(p_set - d_set)  # patient has, donor lacks
-        total_hvg += len(d_set - p_set)  # donor has, patient lacks
-
-    return matches, total_gvh, total_hvg
+# def locuses_match_between_genos(geno_pat, geno_don):
+#     matches = []
+#     total_gvh = 0
+#     total_hvg = 0
+#
+#     for i in range(0, len(geno_pat), 2):
+#         a1, b1 = geno_pat[i], geno_pat[i + 1]
+#         a2, b2 = geno_don[i], geno_don[i + 1]
+#
+#         P = {x for x in (a1, b1) if x not in (None, 0)}
+#         D = {x for x in (a2, b2) if x not in (None, 0)}
+#
+#         gvh_i = len(P - D)  # patient has, donor lacks
+#         hvg_i = len(D - P)  # donor has, patient lacks
+#
+#         total_gvh += gvh_i
+#         total_hvg += hvg_i
+#
+#         mismatch_i = max(gvh_i, hvg_i)  # table's #Max
+#         matches.append(mismatch_i)
+#
+#     return matches, total_gvh, total_hvg
 
 
 class DonorsMatching(object):
@@ -84,16 +97,20 @@ class DonorsMatching(object):
         "_genotype_candidates",
         "patients",
         "verbose",
+        "bidirectional_dict",
+        "_donor_neighbors_cache",
     )
 
-    def __init__(self, graph: Graph, verbose: bool = False):
+    def __init__(self, graph: Graph, verbose: bool = False, bdict: bidict = None):
         self._graph: Graph = graph
         self._patients_graph: nx.DiGraph = nx.DiGraph()
         self._genotype_candidates: Dict[
             int, Dict[int, List[Tuple[float, int]]]
         ] = {}  # AMIT ADD
-        self.patients: Dict[int, Sequence[int]] = {}
+        self.patients: Dict[int, Dict[int, HashableArray]] = {}
         self.verbose = verbose
+        self.bidirectional_dict = bdict
+        self._donor_neighbors_cache = {}
 
     def get_most_common_genotype(self, donor_id):
         """Takes a donor ID and return his/her most common genotype."""
@@ -144,7 +161,9 @@ def __find_genotype_candidates_from_class(
     ) -> Tuple[np.ndarray, np.ndarray]:
         """Takes an integer subclass.
         Returns the genotypes (ids and values) which are connected to it in the graph"""
-        return self._graph.class_neighbors(clss, Len = len(list(self.patients.values())[0]))
+        return self._graph.class_neighbors(
+            clss, Len=len(next(iter(next(iter(self.patients.values())).values())))
+        )
 
     def __find_donor_from_geno(self, geno_id: int) -> Sequence[int]:
         """Gets the LOL ID of a genotype.
@@ -210,14 +229,21 @@ def __add_matched_genos_to_graph(
                                                       weight={geno_num: [probability, similarity]})
                     """
 
-    def __classes_and_subclasses_from_genotype(self, genotype: HashableArray):
+    def __classes_and_subclasses_from_genotype(self, genotype):
         subclasses = []
-        ALLELES_IN_CLASS_I = -2*int(-len(genotype)/4-0.5)
+        ALLELES_IN_CLASS_I = -2 * int(-len(genotype) / 4 - 0.5)
         ALLELES_IN_CLASS_II = len(genotype) - ALLELES_IN_CLASS_I
-        classes = [(genotype[:ALLELES_IN_CLASS_I], 0), (genotype[ALLELES_IN_CLASS_I:], 1)]
+        classes = [
+            (tuple(genotype[:ALLELES_IN_CLASS_I]), 0),
+            (tuple(genotype[ALLELES_IN_CLASS_I:]), 1),
+        ]
         num_of_alleles_in_class = [ALLELES_IN_CLASS_I, ALLELES_IN_CLASS_II]
 
-        int_classes = [(tuple_geno_to_int(tuple(clss[0])), clss[1]) for clss in classes]
+        int_classes = []
+        for clss in classes:
+            hash_clss = gl_string_to_hash(str(clss[0])) % 1000000000 + 1000000000
+            int_classes.append((hash_clss, clss[1]))
+
         for clss in int_classes:
             self._patients_graph.add_edge(clss[0], genotype)
 
@@ -228,22 +254,29 @@ def __classes_and_subclasses_from_genotype(self, genotype: HashableArray):
             for k in range(0, num_of_alleles_in_class[class_num]):
                 # set the missing allele to always be the second allele in the locus
                 if k % 2 == 0:
-                    sub = tuple_geno_to_int(
-                        classes[class_num][0][0:k] + ZEROS + classes[class_num][0][k + 1 :]
+                    subclass = tuple(
+                        classes[class_num][0][0:k]
+                        + (0,)
+                        + classes[class_num][0][k + 1 :]
                     )
                 else:
-                    sub = tuple_geno_to_int(
+                    subclass = tuple(
                         classes[class_num][0][0 : k - 1]
-                        + ZEROS
+                        + (0,)
                         + classes[class_num][0][k - 1 : k]
                         + classes[class_num][0][k + 1 :]
                     )
+                hash_subclass = (
+                    gl_string_to_hash(str(subclass)) % 1000000000 + 2000000000
+                )
 
                 # missing allele number is the index of the first allele of the locus the missing allele belongs to.
                 # Could be [0, 2, 4, 6, 8]
                 missing_allele_num = ALLELES_IN_CLASS_I * class_num + 2 * (k // 2)
                 subclass = ClassMinusOne(
-                    subclass=sub, class_num=class_num, allele_num=missing_allele_num
+                    subclass=hash_subclass,
+                    class_num=class_num,
+                    allele_num=missing_allele_num,
                 )
 
                 # add subclass -> genotype edge to patients graph
@@ -253,8 +286,6 @@ def __classes_and_subclasses_from_genotype(self, genotype: HashableArray):
 
         return int_classes, subclasses
 
-
-
     def create_patients_graph(self, f_patients: str):
         """
         create patients graph. \n
@@ -273,13 +304,52 @@ def create_patients_graph(self, f_patients: str):
             # retrieve all line's parameters
             line_values = line.strip().split(",")
             patient_id, geno, prob, index = line_values
-
-            geno = gl_string_to_integers(geno)
-            patient_id = int(patient_id)
+            patient_id = -1 * int(patient_id)
             index = int(index)
             prob = float(prob)
 
-            # handle new patient appearance in file
+            encoded = []
+            for locus in geno.split("^"):
+                parts = locus.split("+")
+                aL = parts[0].strip() if len(parts) > 0 else "N"
+                aR = parts[1].strip() if len(parts) > 1 else "N"
+
+                # Classify left allele
+                if "N" in aL:
+                    tL, vL = "missing", 0
+                elif aL in self.bidirectional_dict:
+                    tL, vL = "known", self.bidirectional_dict[aL]
+                else:
+                    tL, vL = "unknown", None
+
+                # Classify right allele
+                if "N" in aR:
+                    tR, vR = "missing", 0
+                elif aR in self.bidirectional_dict:
+                    tR, vR = "known", self.bidirectional_dict[aR]
+                else:
+                    tR, vR = "unknown", None
+
+                # Decide per-locus encoding (two outputs per locus, in order)
+                if tL == "unknown" and tR == "unknown":
+                    if aL == aR:
+                        encoded.extend(
+                            [1, 1]
+                        )  # same unknown twice (homozygous unknown)
+                    else:
+                        encoded.extend([1, 2])  # two different unknowns
+                elif tL == "unknown" and tR != "unknown":
+                    encoded.extend([1, vR])  # left unknown, right known/missing
+                elif tL != "unknown" and tR == "unknown":
+                    encoded.extend([vL, 1])  # left known/missing, right unknown
+                else:
+                    encoded.extend([vL, vR])  # both known/missing
+
+            geno = HashableArray(encoded)
+            if patient_id in self.patients.keys():
+                self.patients[patient_id][index] = geno
+            else:
+                self.patients[patient_id] = {index: geno}
             if index == 0:
                 # set normalized probabilities
                 for HLA, probability in prob_dict.items():
@@ -290,7 +360,6 @@ def create_patients_graph(self, f_patients: str):
                 # initialize parameters
                 prob_dict = {}
                 total_prob = 0
-                self.patients[patient_id] = geno
                 self._genotype_candidates[
                     patient_id
                 ] = {}  # AMIT ADD - initialize _genotype_candidates
@@ -299,13 +368,6 @@ def create_patients_graph(self, f_patients: str):
                 subclasses_by_patient[patient_id] = set()
                 classes_by_patient[patient_id] = set()
 
-            # sort alleles for each HLA-X
-            l = len(geno)
-            for x in range(0, l, 2):
-                geno[x : x + 2] = sorted(geno[x : x + 2])
-
-            geno = HashableArray(geno)
-
             # add probabilities to probability dict
             total_prob += prob
             if geno not in prob_dict:
@@ -352,14 +414,22 @@ def find_geno_candidates_by_subclasses(self, subclasses):
                     genotypes_value,
                 ) = self.__find_genotype_candidates_from_subclass(subclass.subclass)
 
-                geno = genotypes_value[0]
-                ALLELES_IN_CLASS_I = -2*int(-len(geno)/4-0.5)
-                ALLELES_IN_CLASS_II = len(geno) - ALLELES_IN_CLASS_I
+                len_geno = len(genotypes_value[0])
+                ALLELES_IN_CLASS_I = -2 * int(-len_geno / 4 - 0.5)
+                ALLELES_IN_CLASS_II = len_geno - ALLELES_IN_CLASS_I
                 # Checks only the locuses that are not certain to match
                 if subclass.class_num == 0:
-                    allele_range_to_check = np.array([x for x in range(len(geno)//2 + (len(geno)//2 & 1), len(geno), 2)] + [subclass.allele_num], dtype=np.uint8)
+                    allele_range_to_check = np.array(
+                        [x for x in range(ALLELES_IN_CLASS_I, len_geno, 2)]
+                        + [subclass.allele_num],
+                        dtype=np.uint8,
+                    )
                 else:
-                    allele_range_to_check = np.array([x for x in range(0, len(geno)//2 + (len(geno)//2 & 1), 2)] + [subclass.allele_num], dtype=np.uint8)
+                    allele_range_to_check = np.array(
+                        [x for x in range(0, ALLELES_IN_CLASS_I, 2)]
+                        + [subclass.allele_num],
+                        dtype=np.uint8,
+                    )
 
                 # number of alleles that already match due to match in subclass
                 matched_alleles: int = (
@@ -395,14 +465,34 @@ def find_geno_candidates_by_classes(self, classes):
                 # Checks only the locuses that are not certain to match (the locuses of the other class)
                 # Class I appearances: 3 locuses = 6 alleles = 23/24 digits
                 # Class II appearances: 2 locuses = 4 alleles = 15/16 digits
-                geno = list(self.patients.values())[0]
+                num_alleles = len(
+                    next(iter(next(iter(self.patients.values())).values()))
+                )
                 if clss[1] == 0:
-                    allele_range_to_check = np.array([x for x in range(len(geno)//2 + (len(geno)//2 & 1), len(geno), 2)], dtype=np.uint8)
-                    matched_alleles: int = len(geno)//2 + (len(geno)//2 & 1)
+                    allele_range_to_check = np.array(
+                        [
+                            x
+                            for x in range(
+                                num_alleles // 2 + (num_alleles // 2 & 1),
+                                num_alleles,
+                                2,
+                            )
+                        ],
+                        dtype=np.uint8,
+                    )
+                    matched_alleles: int = num_alleles // 2 + (num_alleles // 2 & 1)
 
                 else:
-                    allele_range_to_check = np.array([x for x in range(0, len(geno)//2 + (len(geno)//2 & 1), 2)], dtype=np.uint8)
-                    matched_alleles: int = len(geno)//2 - (len(geno)//2 & 1)
+                    allele_range_to_check = np.array(
+                        [
+                            x
+                            for x in range(
+                                0, num_alleles // 2 + (num_alleles // 2 & 1), 2
+                            )
+                        ],
+                        dtype=np.uint8,
+                    )
+                    matched_alleles: int = num_alleles // 2 - (num_alleles // 2 & 1)
 
                 # Compares the candidate to the patient's genotypes, and adds the match geno candidates to the graph.
                 self.__add_matched_genos_to_graph(
@@ -434,8 +524,7 @@ def find_geno_candidates_by_genotypes(self, patient_id: int):
                 "probability"
             ]  # patient's geno probability
 
-            int_geno = tuple_geno_to_int(geno)
-            geno_id = self._graph.get_node_id(int_geno)
+            geno_id = self._graph.get_node_id(geno)
             if not geno_id:
                 continue
 
@@ -485,7 +574,7 @@ def score_matches(
         matched: Set[int],
     ) -> Tuple[Set[int], int, pd.DataFrame]:
         """
-        Given a number of mismatches and a patient, this function will return a dictionary
+        Given a number of mismatches and a patient, this function will return a tuple
         of all matching donors found in the data with the specific number of mismatches,
         sorted by their probability for a match.
 
@@ -496,10 +585,11 @@ def score_matches(
         :param threshold: Minimal score value for a valid match. default is 0.1.
         :param cutoff: Maximum number of matches to return. default is 50.
         :param matched: A set of donors ID that have already matched for this patient.
-        :return: a dictionary of all matching donors and their matching properties.
+        :return: a tuple of (matched set, count of matches, updated results DataFrame).
         """
         if len(matched) >= cutoff:
             return matched, 0, results_df
+        num_alleles = len(next(iter(self.patients[patient].values())))
 
         # a loop that set the scores for all the matching candidates.
         patient_scores = {}
@@ -509,7 +599,7 @@ def score_matches(
         ].items():  # AMIT ADD
             for prob, matches in genotype_matches.values():  # AMIT CHANGE
                 # match_info = (probability of patient's genotype, number of matches to patient's genotype)
-                if matches != len(list(self.patients.values())[0]) - mismatch:
+                if matches != num_alleles - mismatch:
                     continue
 
                 # add the probabilities multiplication of the patient and all the donors that has this genotype
@@ -522,7 +612,6 @@ def score_matches(
                         patient_scores[donor][0] += prob * donor_prob
                         if donor_prob > patient_scores[donor][2]:
                             patient_scores[donor][1:] = [hla_id, donor_prob]
-
                     else:
                         patient_scores[donor] = [prob * donor_prob, hla_id, donor_prob]
 
@@ -547,10 +636,80 @@ def score_matches(
             if len(matched) >= cutoff:
                 break
             matched.add(donor)
-            self.__append_matching_donor(
-                add_donors, donors_info, patient, donor, score * 100, mismatch
+            # Compute chances for gvh, hvg, and mis
+            pats = self.patients[patient]
+            chances = {
+                "gvh": [0, 0, 0, 0, 0],
+                "hvg": [0, 0, 0, 0, 0],
+                "mis": [0, 0, 0, 0, 0],
+            }
+
+            for index, pat in pats.items():
+                pat_prob = self._patients_graph[pat][patient]["probability"]
+
+                # --- cache donor genotype neighbors once per donor ---
+                if not hasattr(self, "_donor_neighbors_cache"):
+                    self._donor_neighbors_cache = {}
+                donor_neighbors = self._donor_neighbors_cache.setdefault(
+                    donor, list(self._graph.neighbors(donor))
+                )
+
+                if not donor_neighbors:
+                    continue
+
+                # Build 2D array of donor genotypes (rows) and 1D array of their probs
+                # Assumes each geno[0] is a fixed-length tuple/list of ints (same len as pat.np()).
+                don_genos = np.array(
+                    [geno[0] for geno in donor_neighbors], dtype=np.uint32
+                )
+                don_probs = np.array([geno[1] for geno in donor_neighbors], dtype=float)
+
+                # Ensure C-contiguous (should already be, but just to be safe)
+                if not don_genos.flags.c_contiguous:
+                    don_genos = np.ascontiguousarray(don_genos, dtype=np.uint32)
+
+                pat_np = np.asarray(
+                    pat.np(), dtype=np.uint32
+                )  # no copy if already uint32 + contiguous
+
+                # One Cython call for all donor genos
+                # out[:,0]=mis, out[:,1]=gvh, out[:,2]=hvg
+                out = batch_locuses_match_between_genos(pat_np, don_genos)
+
+                # Accumulate chances
+                # (same logic as before; clamp at 4 for "3+")
+                for j in range(out.shape[0]):
+                    mis = int(out[j, 0])
+                    gvh = int(out[j, 1])
+                    hvg = int(out[j, 2])
+                    w = pat_prob * don_probs[j]
+                    chances["gvh"][min(gvh, 4)] += w
+                    chances["hvg"][min(hvg, 4)] += w
+                    chances["mis"][min(mis, 4)] += w
+
+            # Compute allele probability
+            allele_prob = self.probability_to_allele(
+                don_id=donor, pat_geno=self.patients[patient][0].np()
             )
 
+            # Append donor data to add_donors
+            add_donors["Patient_ID"].append(-1 * patient)
+            add_donors["Donor_ID"].append(-1 * donor)
+            add_donors["Match_Probability"].append(allele_prob)
+            # add_donors["Match_Between_Most_Commons"].append(compare_commons)
+            add_donors["Matching_Probability"].append(score)
+            for t in ("GvH", "HvG", ""):
+                for i in range(5):
+                    key = f"chance_for_{'3+' if i == 4 else i}_{t + '_Mismatches' if t else 'Mismatches'}"
+                    add_donors[key].append(chances[t.lower() if t else "mis"][i])
+
+            add_donors["Permissive/Non-Permissive"].append("-")
+            # TODO: add permissiveness algorithm
+
+            # add the other given fields to the results
+            for field in donors_info:
+                add_donors[field].append(DONORS_DB.loc[donor, field])
+
         results_df = pd.concat(
             [results_df, pd.DataFrame(add_donors)], ignore_index=True
         )
@@ -558,48 +717,7 @@ def score_matches(
         if self.verbose:
             print_time(f"({mismatch} MMs) Found {count_matches} matches")
 
-        return matched, count_matches, results_df  # 3433825
-
-    def __append_matching_donor(
-        self,
-        add_donors: Dict,
-        donors_info: Iterable[str],
-        patient: int,
-        donor: int,
-        match_prob: float,
-        mm_number: int,
-    ) -> None:
-        """add a donor to the matches dictionary"""
-        pat = self.patients[patient]
-        don = self.get_most_common_genotype(donor)
-        compare_commons, gvh, hvg = locuses_match_between_genos(
-            pat, don
-        )
-
-        add_donors["Patient_ID"].append(patient)
-        add_donors["Donor_ID"].append(donor)
-        allele_prob = self.probability_to_allele(
-            don_id=donor, pat_geno=self.patients[patient]
-        )
-        add_donors["Match_Probability"].append(allele_prob)
-        add_donors["Match_Between_Most_Commons"].append(compare_commons)
-
-        add_donors["Matching_Probability"].append(match_prob)
-        actual_mismatches = 0
-        for match_score in compare_commons:
-            if match_score != 2:
-                actual_mismatches += (2 - match_score)
-
-        add_donors["Number_Of_Mismatches"].append(actual_mismatches)
-        add_donors["GvH_Mismatches"].append(gvh)
-        add_donors["HvG_Mismatches"].append(hvg)
-
-        add_donors["Permissive/Non-Permissive"].append("-")
-        # TODO: add permissiveness algorithm
-
-        # add the other given fields to the results
-        for field in donors_info:
-            add_donors[field].append(DONORS_DB.loc[donor, field])
+        return matched, count_matches, results_df
 
     @property
     def patients_graph(self):
diff --git a/grma/match/graph_wrapper.py b/grma/match/graph_wrapper.py
index 6f1fe07..feb3080 100644
--- a/grma/match/graph_wrapper.py
+++ b/grma/match/graph_wrapper.py
@@ -7,6 +7,7 @@
 import numpy as np
 from grma.utilities.geno_representation import HashableArray
 from grma.match.lol_graph import LolGraph
+from bidict import bidict
 
 NODES_TYPES = Union[int, HashableArray]
 
@@ -57,10 +58,12 @@ def get_edge_data(
         ret = self._graph.get_edge_data(node1_num, node2_num)
         return default if ret == exception_val else ret
 
-    def class_neighbors(self, node: NODES_TYPES | int, search_lol_id: bool = False, Len: int = 10):
+    def class_neighbors(
+        self, node: NODES_TYPES | int, search_lol_id: bool = False, Len: int = 10
+    ):
         node_num = self._map_node_to_number[node[0]] if not search_lol_id else node
         neighbors_list = self._graph.neighbors_unweighted(node_num)
-        neighbors_list_values = np.ndarray([len(neighbors_list), Len], dtype=np.uint16)
+        neighbors_list_values = np.ndarray([len(neighbors_list), Len], dtype=np.uint32)
         for i, neighbor in enumerate(neighbors_list):
             neighbors_list_values[i, :] = self._graph.arr_node_value_from_id(neighbor)
 
@@ -120,5 +123,5 @@ def node_value_from_id(self, node_id: int) -> NODES_TYPES:
 
     @classmethod
     def from_pickle(cls, path: Union[str, PathLike]):
-        graph_dict = pickle.load(open(path, "rb"))
-        return cls(graph_dict)
+        graph_bdict = pickle.load(open(path, "rb"))
+        return cls(graph_bdict[0]), bidict(graph_bdict[1])
diff --git a/grma/match/lol_graph.pyx b/grma/match/lol_graph.pyx
index ee2679f..c123d15 100644
--- a/grma/match/lol_graph.pyx
+++ b/grma/match/lol_graph.pyx
@@ -15,8 +15,8 @@ cdef class LolGraph:
         UINT[:] _index_list
         UINT[:] _neighbors_list
         FLOAT[:] _weights_list
-        UINT[:] _map_number_to_num_node
-        UINT16[:, :] _map_number_to_arr_node
+        INT[:] _map_number_to_num_node
+        UINT[:, :] _map_number_to_arr_node  # Changed from UINT16[:, :]
         UINT _arrays_start
         bint directed
         bint weighted
@@ -24,8 +24,8 @@ cdef class LolGraph:
     def __init__(self, UINT[:] index_list,
                  UINT[:] neighbors_list,
                  FLOAT[:] weights_list,
-                 UINT[:] map_number_to_num_node,
-                 UINT16[:, :] map_number_to_arr_node,
+                 INT[:] map_number_to_num_node,
+                 UINT[:, :] map_number_to_arr_node,  # Changed from UINT16[:, :]
                  UINT arrays_start,
                  bint directed, bint weighted):
         self._index_list = index_list
@@ -49,12 +49,12 @@ cdef class LolGraph:
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    cpdef UINT16[:] arr_node_value_from_id(self, UINT node_id):
+    cpdef UINT[:] arr_node_value_from_id(self, UINT node_id):  # Changed from UINT16[:]
         return self._map_number_to_arr_node[node_id - self._arrays_start]
 
     @cython.boundscheck(False)
     @cython.wraparound(False)
-    cpdef UINT num_node_value_from_id(self, UINT node_id):
+    cpdef INT num_node_value_from_id(self, UINT node_id):
         return self._map_number_to_num_node[node_id]
 
     @cython.boundscheck(False)
@@ -108,7 +108,6 @@ cdef class LolGraph:
             return self._weights_list[idx + node2_index]
         return -1
 
-    # get neighbors of specific node n
     @cython.boundscheck(False)
     @cython.wraparound(False)
     cpdef tuple neighbors_weighted(self, UINT node):
@@ -152,8 +151,8 @@ cdef class LolGraph:
         """return the second degree neighbors of a node - neighbors of neighbors."""
         cdef UINT idx, idx_end, i, j, pointer, neighbor_1st, idx_1st_neigh, idx_end_1st_neigh, neighbor_id
         cdef np.ndarray[UINT, ndim=1] neighbors_list_id, neighbors_id
-        cdef UINT16[:] arr
-        cdef np.ndarray[UINT16, ndim=2] neighbors_value
+        cdef UINT[:] arr  # Changed from UINT16[:]
+        cdef np.ndarray[UINT, ndim=2] neighbors_value  # Changed from UINT16
         cdef UINT num_of_neighbors_2nd
         cdef UINT loci_len
 
@@ -178,7 +177,7 @@ cdef class LolGraph:
                 pointer += 1
 
         loci_len = <UINT> self._map_number_to_arr_node.shape[1]
-        neighbors_value = np.zeros((num_of_neighbors_2nd, loci_len), dtype=np.uint16)
+        neighbors_value = np.zeros((num_of_neighbors_2nd, loci_len), dtype=np.uint32)  # Changed from uint16
         for i in range(len(neighbors_id)):
             neighbor_id = neighbors_id[i]
             arr = self.arr_node_value_from_id(neighbor_id)
diff --git a/grma/match/match.py b/grma/match/match.py
index 99a55ae..1ba0f66 100644
--- a/grma/match/match.py
+++ b/grma/match/match.py
@@ -9,7 +9,8 @@
 from grim import grim
 import csv
 import ast
-
+from bidict import bidict
+import re
 
 from grma.match import Graph as MatchingGraph
 from grma.match.donors_matching import DonorsMatching, _init_results_df
@@ -120,6 +121,7 @@ def find_matches(
     save_to_csv: bool = False,
     calculate_time: bool = False,
     output_dir: str = "output",
+    bdict: bidict = None,
 ):
     """
     The main function responsible for performing the matching.
@@ -148,7 +150,7 @@ def find_matches(
     if verbose:
         print_time("Start graph matching")
 
-    g_m = DonorsMatching(match_graph, verbose=verbose)
+    g_m = DonorsMatching(match_graph, verbose=verbose, bdict=bdict)
 
     # create patients graph and find all candidates
     start_build_graph = time.time()
@@ -199,13 +201,20 @@ def find_matches(
     patients_results = {patient: None for patient in patients}
     with open(imputation_filename, "r") as f:
         line = f.readline().strip()
-        loci = list(dict.fromkeys([item for item in line.split(',')[1].replace('*', ',').replace('+', ',').replace('^', ',').replace(':', ',').split(',') if not item.isdigit() and item]))
-
+        text = line.split(",")[1]
+        raw_loci = re.findall(r"\b([A-Za-z0-9]+)\*", text)
+        loci = []
+        for locus in raw_loci:
+            if locus in ("DRB3", "DRB4", "DRB5", "DRBX"):
+                locus = "DRB3/4/5/X"
+            loci.append(locus)
+        loci = list(dict.fromkeys(loci))
     if patients:
         avg_build_time = (end_build_graph - start_build_graph) / len(patients)
     else:
         avg_build_time = 0
 
+    times = []
     for patient in patients:
         # print("\n","Patient", patient, "Verbose", verbose)
         # For each patient we search matches in the donor graph.
@@ -222,11 +231,13 @@ def find_matches(
             patient, g_m, donors_info, threshold, cutoff, classes, subclasses
         )
 
-
         match_Probability = results_df["Match_Probability"]
-        match_Between_Most_Commons = results_df["Match_Between_Most_Commons"]
+        # match_Between_Most_Commons = results_df["Match_Between_Most_Commons"]
         Permissive = results_df["Permissive/Non-Permissive"]
-        df_new = results_df.drop(columns=['Match_Probability', 'Match_Between_Most_Commons', 'Permissive/Non-Permissive']).copy()
+        # df_new = results_df.drop(columns=['Match_Probability', 'Match_Between_Most_Commons', 'Permissive/Non-Permissive']).copy()
+        df_new = results_df.drop(
+            columns=["Match_Probability", "Permissive/Non-Permissive"]
+        ).copy()
 
         for idx, row in enumerate(match_Probability):
             k = 0
@@ -234,19 +245,30 @@ def find_matches(
                 for i in [1, 2]:
                     df_new.loc[idx, f"Match_Probability_{locus}_{i}"] = row[k]
                     k += 1
-        df_new['Permissive/Non-Permissive'] = Permissive
-        for idx, row in enumerate(match_Between_Most_Commons):
-            for l, locus in enumerate(loci):
-                df_new.loc[idx, f"Match_Between_Most_Commons_{locus}"] = row[l]
+        df_new["Permissive/Non-Permissive"] = Permissive
+        # for idx, row in enumerate(match_Between_Most_Commons):
+        #     for l, locus in enumerate(loci):
+        #         df_new.loc[idx, f"Match_Between_Most_Commons_{locus}"] = row[l]
         results_df = df_new.copy()
+
+        # Normalize mismatch probability columns
+        # for cols in [
+        #     ['chance_for_0_GvH_Mismatches', 'chance_for_1_GvH_Mismatches', 'chance_for_2_GvH_Mismatches', 'chance_for_3_GvH_Mismatches', 'chance_for_3+_GvH_Mismatches'],
+        #     ['chance_for_0_HvG_Mismatches', 'chance_for_1_HvG_Mismatches', 'chance_for_2_HvG_Mismatches', 'chance_for_3_HvG_Mismatches', 'chance_for_3+_HvG_Mismatches'],
+        #     ['chance_for_0_Mismatches', 'chance_for_1_Mismatches', 'chance_for_2_Mismatches', 'chance_for_3_Mismatches', 'chance_for_3+_Mismatches']
+        # ]:
+        #     prob_sum = results_df[cols].sum(axis=1)
+        #     for col in cols:
+        #         results_df[col] = results_df[col].div(prob_sum).fillna(0.0).round(2)
+
         end = time.time()
         patient_time = end - start + avg_build_time
-
+        times.append(end - start)
         if calculate_time:
             patients_results[patient] = (results_df, patient_time)
         else:
             patients_results[patient] = results_df
-
+        patient = -1 * patient
         if save_to_csv:
             # save results to csv
             results_df.to_csv(
@@ -254,13 +276,15 @@ def find_matches(
                     f"{output_dir}/search_{search_id}", f"Patient_{patient}.csv"
                 ),
                 index=True,
-                float_format="%.2f",
+                # float_format="%.2f",
             )
             if verbose:
                 print_time(
                     f"Saved Matching results for {patient} in "
                     f"{os.path.join(f'{output_dir}/search_{search_id}', f'Patient_{patient}.csv')}"
                 )
+    with open("times_grma.json", "w") as f:
+        json.dump(times, f, indent=4)
     return patients_results
 
 
diff --git a/grma/utilities/cutils.pyx b/grma/utilities/cutils.pyx
index 9072d3f..895bf06 100644
--- a/grma/utilities/cutils.pyx
+++ b/grma/utilities/cutils.pyx
@@ -9,8 +9,6 @@ ctypedef np.int8_t INT8
 ctypedef np.uint16_t UINT16
 ctypedef np.uint32_t UINT32
 
-
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cpdef np.ndarray[UINT32, ndim=2] cdrop_less_than_7_matches(np.ndarray[UINT32, ndim=1] ids,
@@ -28,11 +26,10 @@ cpdef np.ndarray[UINT32, ndim=2] cdrop_less_than_7_matches(np.ndarray[UINT32, nd
             count += 1
     return ret[:count, :]
 
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cpdef np.ndarray[INT8, ndim=1] ccheck_similarity(np.ndarray[UINT16, ndim=1] patients_geno,
-                                                 np.ndarray[UINT16, ndim=2] donors_genos,
+cpdef np.ndarray[INT8, ndim=1] ccheck_similarity(np.ndarray[UINT32, ndim=1] patients_geno,  # Changed from UINT16
+                                                 np.ndarray[UINT32, ndim=2] donors_genos,  # Changed from UINT16
                                                  np.ndarray[UINT8, ndim=1] allele_range, UINT8 init_count_similar):
     """
     Takes 2 genotypes to check similarity between \n
@@ -45,9 +42,9 @@ cpdef np.ndarray[INT8, ndim=1] ccheck_similarity(np.ndarray[UINT16, ndim=1] pati
     """
     cdef:
         np.ndarray[INT8, ndim=1] similarities
-        np.ndarray[UINT16, ndim=1] donors_geno
+        np.ndarray[UINT32, ndim=1] donors_geno  # Changed from UINT16
         UINT8 count_similar, counted, number_of_alleles, allele_num
-        UINT16 patient_alleles0, patient_alleles1, donor_alleles0, donor_alleles1
+        UINT32 patient_alleles0, patient_alleles1, donor_alleles0, donor_alleles1  # Changed from UINT16
 
     similarities = np.zeros(len(donors_genos), dtype=np.int8)
     number_of_alleles = len(allele_range)
@@ -90,10 +87,9 @@ cpdef np.ndarray[INT8, ndim=1] ccheck_similarity(np.ndarray[UINT16, ndim=1] pati
 
     return similarities
 
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cpdef UINT32 chash(np.ndarray[UINT16, ndim=1] arr):
+cpdef UINT32 chash(np.ndarray[UINT32, ndim=1] arr):
     cdef UINT32 h = 17
     cdef UINT8 i
     for i in range(len(arr)):
diff --git a/grma/utilities/geno_representation.py b/grma/utilities/geno_representation.py
index 1773874..f1ec8d7 100644
--- a/grma/utilities/geno_representation.py
+++ b/grma/utilities/geno_representation.py
@@ -27,7 +27,7 @@ class HashableArray:
 
     def __init__(self, arr):
         self.it = None
-        self.arr = np.array(arr, dtype=np.uint16)
+        self.arr = np.array(arr, dtype=np.uint32)
 
     def __hash__(self):
         return chash(self.arr)
diff --git a/grma/utilities/utils.py b/grma/utilities/utils.py
index 3f52a1b..5de97d2 100644
--- a/grma/utilities/utils.py
+++ b/grma/utilities/utils.py
@@ -3,9 +3,16 @@
 from datetime import datetime
 from typing import Iterable
 
+import numpy as np
+
 from grma.utilities.cutils import cdrop_less_than_7_matches, ccheck_similarity
 
 from collections.abc import Sequence
+import re
+import ast
+from typing import List
+import hashlib
+from grma.utilities.geno_representation import HashableArray
 
 LENGTH_OF_NUMBERS_IN_ALLELE: int = 4
 TO_SEROLOGY: dict[str, int] = {
@@ -225,9 +232,8 @@ def allele_style(
 def tuple_geno_to_int(tuple_geno: Iterable[int]):
     """convert genotype from Iterable to integer"""
     string = ""
-    fill: int = 4
     for i in tuple_geno:
-        string += str(i).zfill(fill)
+        string += str(i)
     return int(string)
 
 
@@ -243,7 +249,10 @@ def drop_less_than_7_matches(ids, similarities):
 
 def check_similarity(patients_geno, donors_genos, allele_range, init_count_similar):
     return ccheck_similarity(
-        patients_geno, donors_genos, allele_range, init_count_similar
+        np.fromiter(patients_geno, dtype=np.uint32),
+        donors_genos,
+        allele_range,
+        init_count_similar,
     )
 
 
@@ -255,3 +264,25 @@ def gl_string_to_integers(genotype: str) -> Sequence[int]:
     ]
 
     return genotype
+
+
+def gl_string_to_hash(s: str) -> int:
+    return int.from_bytes(hashlib.blake2b(s.encode(), digest_size=8).digest(), "big")
+
+
+def geno_to_int(geno_str):
+    geno = tuple([a for block in geno_str.split("^") for a in block.split("+")])
+    allele_list = []
+    for item in geno:
+        locus, allele = item.split("*")
+        a1, a2 = allele.split(":")
+        if locus == "DRB3":
+            a1 = "3" + a1
+        elif locus == "DRB4":
+            a1 = "4" + a1
+        elif locus == "DRB5":
+            a1 = "5" + a1
+        else:
+            a1 = "0" + a1
+        allele_list.append(int("1" + a1 + a2))
+    return HashableArray(allele_list)
diff --git a/pyproject.toml b/pyproject.toml
index 440f358..8403ba1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools>=42", "wheel", "Cython>=0.29.32", "numpy>=1.20.2"]
+requires = ["setuptools>=42", "wheel", "Cython>=0.29.32", "numpy==2.3.4"]
 build-backend = "setuptools.build_meta"
 
 [project]
diff --git a/requirements.txt b/requirements.txt
index 34542f6..701176f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,8 @@
 toml==0.10.2
-py-graph-imputation==0.1.1
+py-graph-imputation>=0.0.12
 py-ard==1.5.5
 tqdm==4.66.1
+bidict
+networkx
+numpy
+pandas
diff --git a/setup.py b/setup.py
index d33e6fa..343429d 100644
--- a/setup.py
+++ b/setup.py
@@ -87,12 +87,18 @@ def build_include_dirs():
                 "grma.utilities.cutils",
                 ["grma/utilities/cutils.pyx"],
                 define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
+                include_dirs=[numpy.get_include()],
             ),
             Extension(
                 "grma.match.lol_graph",
                 ["grma/match/lol_graph.pyx"],
                 define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
             ),
+            Extension(
+                "grma.match.hla_match",
+                ["grma/match/hla_match.pyx"],
+                include_dirs=[numpy.get_include()],
+            ),
         ]
     ),
 )