From 9e0c923e9228d423936c6b7fc4bc0a59315c4cb1 Mon Sep 17 00:00:00 2001
From: sfatimar <sahar.fatima@intel.com>
Date: Thu, 4 Dec 2025 11:47:28 +0530
Subject: [PATCH 1/4] Affinity Draft Commit

---
 .../providers/openvino/backend_manager.cc     |  4 ++-
 .../core/providers/openvino/backend_utils.cc  | 35 +++++++++++++++++++
 .../core/providers/openvino/backend_utils.h   |  2 ++
 .../openvino/backends/basic_backend.cc        |  2 ++
 .../openvino/backends/basic_backend.h         |  6 ++--
 .../core/providers/openvino/contexts.h        |  4 ++-
 .../openvino/openvino_execution_provider.cc   |  8 ++---
 .../openvino/openvino_parser_utils.cc         | 35 ++++++++++++++++++-
 .../openvino/openvino_parser_utils.h          |  1 +
 .../openvino/openvino_provider_factory.cc     |  5 +++
 .../openvino/ov_versions/data_ops.cc          |  1 +
 .../python/onnxruntime_pybind_state.cc        |  2 +-
 .../test/perftest/command_args_parser.cc      |  1 +
 onnxruntime/test/perftest/ort_test_session.cc |  4 ++-
 14 files changed, 98 insertions(+), 12 deletions(-)
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 3426a2781bbc6..e2eb6ea8b361d 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -476,7 +476,9 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
   };
 
   [[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph);
-  [[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type);
+  [[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = false;
+  if (session_context_.device_type.find("NPU") != std::string::npos)
+     enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", "NPU");
 #if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025))
   if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) {
     if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) {
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index 45e518d16686e..6de9463ffc2af 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -57,6 +57,12 @@ CreateOVModel(std::string&& model,
       LOGS_DEFAULT(INFO) << log_tag << "Setting the ov tensor layout to specified layout";
       ov_model = Set_Layout(ov_model, session_context.layout);
     }
+
+    if (!session_context.affinity.empty()) {
+      LOGS_DEFAULT(INFO) << log_tag << "Setting the ov nodes to specified affinity";
+      Set_Affinity(ov_model, session_context);
+    }
+
     // Check for Constant Folding
     if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
       ov::pass::ConstantFolding pass_const_obj;
@@ -141,6 +147,35 @@ std::shared_ptr<OVNetwork> Set_Layout(std::shared_ptr<OVNetwork> ov_model, const
   return preproc.build();
 }
 
+void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& session_context) {
+  
+  std::string selected_device = "CPU";
+  if (auto delimit = session_context.device_type.find(":"); delimit != std::string::npos) {
+    auto device_mode = session_context.device_type.substr(0, delimit);
+    if (device_mode.find("HETERO") != std::string::npos) {
+      const auto& devices = session_context.device_type.substr(delimit + 1);
+      auto delimit_comma = devices.find(",");
+      selected_device = devices.substr(0, delimit_comma);
+    } else {
+      ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO");
+    }
+  } else {
+    ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO");
+  }
+
+  for (auto&& ov_node : ov_model->get_ops()) {
+     auto name = ov_node->get_friendly_name();
+     auto it = session_context.affinity.find(name);
+     if (it != session_context.affinity.end()) {
+       ov_node->get_rt_info()["affinity"] = it->second;
+       std::cout << "node name " << name << " on " << it->second << "\t";
+     } else {
+       ov_node->get_rt_info()["affinity"] = selected_device;   
+       std::cout << "node name " << name << " on " << selected_device << "\t";
+     }
+  }
+}
+
 int GetFirstAvailableDevice(SessionContext& session_context) {
   int i = 0;
   // Get the first available VAD-M device and set the device to busy
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
index 8ba35e0abd1bc..db32948a24655 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -106,6 +106,8 @@ void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std
 
 bool IsModelStreamXML(std::istream& model_stream);
 
+void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& session_context);
+
 }  // namespace backend_utils
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index d7fc0553fb1d4..f82e72cab8ce3 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -99,6 +99,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                !session_context_.so_context_enable &&
                                session_context_.reshape.empty() &&
                                session_context_.layout.empty() &&
+                               session_context_.affinity.empty() &&
                                !enable_causallm &&
                                !eligible_for_cpu_fallback &&
                                auto_unified_compile);
@@ -110,6 +111,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                                    subgraph_context_.subgraph_name);
       } else {  // For all other types use ov::ov_core read_model() to generate OV IR
                 // followed by ov::ov_core compile_model()
+        std::cout << "CreateOVModel\n";
         ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
         exe_network_ = OVCore::Get()->CompileModel(
             ov_model, hw_target, device_config, enable_causallm, subgraph_context_.subgraph_name);
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 2cf3d3faa8b47..690835eabb391 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -82,9 +82,9 @@ struct OnnxToOvNetworkBindings {
           }
         }
 
-        ORT_ENFORCE(matched_names, log_tag,
-                    "Input names mismatch between OpenVINO and ONNX. ", onnx_name,
-                    " doesn't exist in the list of OpenVINO input tensor names");
+        //ORT_ENFORCE(matched_names, log_tag,
+        //            "Input names mismatch between OpenVINO and ONNX. ", onnx_name,
+        //            " doesn't exist in the list of OpenVINO input tensor names");
 
         auto ov_param_index = std::distance(ov_parameters.begin(), it);
 
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index ebb716a64162c..bcbb463733b89 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -24,6 +24,7 @@ namespace fs = std::filesystem;
 using config_t = std::map<std::string, ov::AnyMap>;
 using reshape_t = std::map<std::string, ov::PartialShape>;
 using layout_t = std::map<std::string, ov::Layout>;
+using affinity_t = std::map<std::string, std::string>;
 
 struct ProviderInfo {
   std::string device_type{""};             // [device_type]: Overrides the accelerator hardware type and
@@ -43,6 +44,7 @@ struct ProviderInfo {
                                            // it will be directly loaded.
   reshape_t reshape{};                     // Used for reshaping the ov input tensor shape at runtime.
   layout_t layout{};                       // Used for specifying the ov input/output tensor layout at runtime.
+  affinity_t affinity{};                   // Used for specifying the nodes affinity at runtime.    
   std::string model_priority{"DEFAULT"};   // High-level OpenVINO model priority hint
                                            // Defines what model should be provided with more performant
                                            // bounded resource first
@@ -66,7 +68,7 @@ struct ProviderInfo {
   const ConfigOptions* config_options{NULL};
   const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
                                                                "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
-                                                               "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"};
+                                                               "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"};
 };
 
 struct RuntimeConfig {
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index a099f85b2a4b9..6c8b60324bbbf 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -207,9 +207,9 @@ common::Status OpenVINOExecutionProvider::Compile(
   return status;
 }
 
-#ifdef USE_OVEP_NPU_MEMORY
+ #ifdef USE_OVEP_NPU_MEMORY
 std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators() {
-  if (session_context_.device_type.find("NPU") != std::string::npos) {
+  /* if (session_context_.device_type.find("NPU") != std::string::npos) {
     AllocatorCreationInfo npu_allocator_info{
         [this](OrtDevice::DeviceId device_id) {
           return std::make_unique<OVRTAllocator>(
@@ -223,9 +223,9 @@ std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators()
 
     // fill in allocator
     return std::vector<AllocatorPtr>{CreateAllocator(npu_allocator_info)};
-  } else {
+  } else {*/
     return std::vector<AllocatorPtr>{};
-  }
+  //}
 }
 #endif
 
diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
index a290fea73e0e8..ae5b9cef05810 100644
--- a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
+++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
@@ -127,7 +127,7 @@ reshape_t OpenVINOParserUtils::ParseInputShape(const std::string& reshape_input_
 
   // Regular expressions for parsing
   const std::regex tensor_pattern(R"(([^\[\],]+)\s*\[(.*?)\])");  // e.g. "input_1[1..5, 2, 3..4],data[1,2,3]"
-  // const std::regex dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)");  // e.g. "1..5", "2", "3..4"
+  // const  dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)");  // e.g. "1..5", "2", "3..4"
   const std::regex dimension_pattern(R"(\s*([^,\s]+)\s*)");
   // Find all tensor shape definitions using regex
   auto tensor_begin = std::sregex_iterator(
@@ -310,5 +310,38 @@ bool OpenVINOParserUtils::Check_Valid_Layout(const std::string& layout_str, cons
   return true;
 }
 
+affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_definition) {
+  LOGS_DEFAULT(INFO) << "[OpenVINO] Affinity is set : " << affinity_definition << "\n";
+  affinity_t result_map;
+
+  // Regex to capture device name and a list of nodes
+  // It captures:
+  // Group 1: device name (e.g., "device")
+  // Group 2: comma-separated list of nodes (e.g., "\"node1\", \"node2\"")
+  std::regex device_nodes_regex(R"(([^,\[\]]+)\[([^\]]+)\])");
+
+  std::sregex_iterator device_it(affinity_definition.begin(), affinity_definition.end(), device_nodes_regex);
+  std::sregex_iterator device_end;
+
+  for (; device_it != device_end; ++device_it) {
+    std::smatch device_match = *device_it;
+    std::string device_name = device_match[1].str();
+    std::string nodes_list_str = device_match[2].str();
+    std::cout << "device_name " << device_name << "\n";
+    std::cout << "nodes_list_str " << nodes_list_str << "\n";
+    std::stringstream nodes_list(nodes_list_str);
+    std::string item;
+
+    while (getline(nodes_list, item, ',')) {
+      result_map[item] = device_name; 
+    }
+  }
+
+  for (auto item : result_map){
+    std::cout << "\n" << item.first << " on " << item.second << "\n"; 
+  }
+  return result_map;
+}
+
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.h b/onnxruntime/core/providers/openvino/openvino_parser_utils.h
index a0936d627df40..090a133e906ab 100644
--- a/onnxruntime/core/providers/openvino/openvino_parser_utils.h
+++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.h
@@ -22,6 +22,7 @@ class OpenVINOParserUtils {
   static std::string TrimWhitespace(const std::string& str);
   static ov::Dimension ParseDimensionRange(const std::string& range_str, const std::string& tensor_name);
   static bool Check_Valid_Layout(const std::string& layout_str, const std::string& tensor_name);
+  static affinity_t ParseAffinity(const std::string& affinity_definition);
 };
 
 }  // namespace openvino_ep
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index 7eb5b062fe7c8..78ef874d45ae0 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -266,6 +266,11 @@ static void ParseProviderInfo(const ProviderOptions& provider_options,
     pi.layout = OpenVINOParserUtils::ParseLayout(provider_options.at("layout"));
   }
 
+  if (provider_options.contains("affinity")) {
+    std::cout << "Provider options contain affinity\n";
+    pi.affinity = OpenVINOParserUtils::ParseAffinity(provider_options.at("affinity"));
+  }
+
   if (provider_options.contains("load_config")) {
     auto parse_config = [&](const std::string& config_str) -> std::map<std::string, ov::AnyMap> {
       // If the config string is empty, return an empty map and skip processing
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 373b2121a9b60..51754046ae199 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -152,6 +152,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
     {"GridSample", V_2022_3, {"CPU"}},
     {"GridSample", V_2023_0, {"GPU"}},
+    {"GroupQueryAttention", V_2025_0, {"CPU"}},
     {"GRU", V_2024_1, {"CPU", "GPU"}},
     {"HardMax", V_2023_1, {"CPU", "GPU"}},
     {"Identity", V_2020_4, {"CPU", "GPU"}},
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 92cf6b085c01e..4961b290da8cf 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -1083,7 +1083,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
     ProviderOptions OV_provider_options_map;
     const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
                                                                  "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
-                                                                 "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"};
+                                                                 "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"};
     auto it = provider_options_map.find(type);
     if (it != provider_options_map.end()) {
       for (auto option : it->second) {
diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
index 8960898f036fc..a1648edd68b98 100644
--- a/onnxruntime/test/perftest/command_args_parser.cc
+++ b/onnxruntime/test/perftest/command_args_parser.cc
@@ -71,6 +71,7 @@ ABSL_FLAG(std::string, i, "",
           "  [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
           "  [OpenVINO only] [reshape_input]: Sets model input shapes with support for bounded dynamic dimensions using 'min..max' syntax (e.g., [1..10,3,224,224]) \n"
           "  [OpenVINO only] [layout]: Specifies the layout for inputs/outputs to interpret tensor dimensions correctly. \n"
+          "  [OpenVINO only] [affinity]: Specifies the affinity of a certain node to a specific device in Hetero Mode. \n"
           "  [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true reshape_input|<input_name>[1,3,60,60..100] layout|<input_name>[NCHW] cache_dir|\"<path>\"\"\n"
           "\n"
           "  [QNN only] [backend_type]: QNN backend type. E.g., 'cpu', 'htp'. Mutually exclusive with 'backend_path'.\n"
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 0f2da07c69d85..59be37803cdf0 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -947,12 +947,14 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
         ov_options[key] = value;
       } else if (key == "layout") {
         ov_options[key] = value;
+      } else if (key == "affinity") {
+        ov_options[key] = value;
       } else {
         ORT_THROW(
             "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO."
             " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', "
             "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer',"
-            " 'enable_causallm', 'reshape_input', 'layout', 'model_priority'] \n");
+            " 'enable_causallm', 'reshape_input', 'layout', 'affinity', 'model_priority'] \n");
       }
     }
     session_options.AppendExecutionProvider_OpenVINO_V2(ov_options);

From b521399e3306c91db760f3dd72c039ca1ee3ef82 Mon Sep 17 00:00:00 2001
From: sfatimar <sahar.fatima@intel.com>
Date: Thu, 4 Dec 2025 11:47:28 +0530
Subject: [PATCH 2/4] Affinity Draft Commit

---
 .../providers/openvino/backend_manager.cc     |  4 ++-
 .../core/providers/openvino/backend_utils.cc  | 35 +++++++++++++++++++
 .../core/providers/openvino/backend_utils.h   |  2 ++
 .../openvino/backends/basic_backend.cc        |  2 ++
 .../openvino/backends/basic_backend.h         |  6 ++--
 .../core/providers/openvino/contexts.h        |  4 ++-
 .../openvino/openvino_execution_provider.cc   |  8 ++---
 .../openvino/openvino_parser_utils.cc         | 35 ++++++++++++++++++-
 .../openvino/openvino_parser_utils.h          |  1 +
 .../openvino/openvino_provider_factory.cc     |  5 +++
 .../openvino/ov_versions/data_ops.cc          |  1 +
 .../python/onnxruntime_pybind_state.cc        |  2 +-
 .../test/perftest/command_args_parser.cc      |  1 +
 onnxruntime/test/perftest/ort_test_session.cc |  4 ++-
 14 files changed, 98 insertions(+), 12 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 3426a2781bbc6..e2eb6ea8b361d 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -476,7 +476,9 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
   };
 
   [[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph);
-  [[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type);
+  [[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = false;
+  if (session_context_.device_type.find("NPU") != std::string::npos)
+     enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", "NPU");
 #if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025))
   if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) {
     if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) {
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index 45e518d16686e..6de9463ffc2af 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -57,6 +57,12 @@ CreateOVModel(std::string&& model,
       LOGS_DEFAULT(INFO) << log_tag << "Setting the ov tensor layout to specified layout";
       ov_model = Set_Layout(ov_model, session_context.layout);
     }
+
+    if (!session_context.affinity.empty()) {
+      LOGS_DEFAULT(INFO) << log_tag << "Setting the ov nodes to specified affinity";
+      Set_Affinity(ov_model, session_context);
+    }
+
     // Check for Constant Folding
     if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
       ov::pass::ConstantFolding pass_const_obj;
@@ -141,6 +147,35 @@ std::shared_ptr<OVNetwork> Set_Layout(std::shared_ptr<OVNetwork> ov_model, const
   return preproc.build();
 }
 
+void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& session_context) {
+  
+  std::string selected_device = "CPU";
+  if (auto delimit = session_context.device_type.find(":"); delimit != std::string::npos) {
+    auto device_mode = session_context.device_type.substr(0, delimit);
+    if (device_mode.find("HETERO") != std::string::npos) {
+      const auto& devices = session_context.device_type.substr(delimit + 1);
+      auto delimit_comma = devices.find(",");
+      selected_device = devices.substr(0, delimit_comma);
+    } else {
+      ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO");
+    }
+  } else {
+    ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO");
+  }
+
+  for (auto&& ov_node : ov_model->get_ops()) {
+     auto name = ov_node->get_friendly_name();
+     auto it = session_context.affinity.find(name);
+     if (it != session_context.affinity.end()) {
+       ov_node->get_rt_info()["affinity"] = it->second;
+       std::cout << "node name " << name << " on " << it->second << "\t";
+     } else {
+       ov_node->get_rt_info()["affinity"] = selected_device;   
+       std::cout << "node name " << name << " on " << selected_device << "\t";
+     }
+  }
+}
+
 int GetFirstAvailableDevice(SessionContext& session_context) {
   int i = 0;
   // Get the first available VAD-M device and set the device to busy
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
index 8ba35e0abd1bc..db32948a24655 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -106,6 +106,8 @@ void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std
 
 bool IsModelStreamXML(std::istream& model_stream);
 
+void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& session_context);
+
 }  // namespace backend_utils
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index d7fc0553fb1d4..f82e72cab8ce3 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -99,6 +99,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                !session_context_.so_context_enable &&
                                session_context_.reshape.empty() &&
                                session_context_.layout.empty() &&
+                               session_context_.affinity.empty() &&
                                !enable_causallm &&
                                !eligible_for_cpu_fallback &&
                                auto_unified_compile);
@@ -110,6 +111,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                                    subgraph_context_.subgraph_name);
       } else {  // For all other types use ov::ov_core read_model() to generate OV IR
                 // followed by ov::ov_core compile_model()
+        std::cout << "CreateOVModel\n";
         ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
         exe_network_ = OVCore::Get()->CompileModel(
             ov_model, hw_target, device_config, enable_causallm, subgraph_context_.subgraph_name);
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 2cf3d3faa8b47..690835eabb391 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -82,9 +82,9 @@ struct OnnxToOvNetworkBindings {
           }
         }
 
-        ORT_ENFORCE(matched_names, log_tag,
-                    "Input names mismatch between OpenVINO and ONNX. ", onnx_name,
-                    " doesn't exist in the list of OpenVINO input tensor names");
+        //ORT_ENFORCE(matched_names, log_tag,
+        //            "Input names mismatch between OpenVINO and ONNX. ", onnx_name,
+        //            " doesn't exist in the list of OpenVINO input tensor names");
 
         auto ov_param_index = std::distance(ov_parameters.begin(), it);
 
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index ebb716a64162c..bcbb463733b89 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -24,6 +24,7 @@ namespace fs = std::filesystem;
 using config_t = std::map<std::string, ov::AnyMap>;
 using reshape_t = std::map<std::string, ov::PartialShape>;
 using layout_t = std::map<std::string, ov::Layout>;
+using affinity_t = std::map<std::string, std::string>;
 
 struct ProviderInfo {
   std::string device_type{""};             // [device_type]: Overrides the accelerator hardware type and
@@ -43,6 +44,7 @@ struct ProviderInfo {
                                            // it will be directly loaded.
   reshape_t reshape{};                     // Used for reshaping the ov input tensor shape at runtime.
   layout_t layout{};                       // Used for specifying the ov input/output tensor layout at runtime.
+  affinity_t affinity{};                   // Used for specifying the nodes affinity at runtime.    
   std::string model_priority{"DEFAULT"};   // High-level OpenVINO model priority hint
                                            // Defines what model should be provided with more performant
                                            // bounded resource first
@@ -66,7 +68,7 @@ struct ProviderInfo {
   const ConfigOptions* config_options{NULL};
   const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
                                                                "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
-                                                               "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"};
+                                                               "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"};
 };
 
 struct RuntimeConfig {
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index a099f85b2a4b9..6c8b60324bbbf 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -207,9 +207,9 @@ common::Status OpenVINOExecutionProvider::Compile(
   return status;
 }
 
-#ifdef USE_OVEP_NPU_MEMORY
+ #ifdef USE_OVEP_NPU_MEMORY
 std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators() {
-  if (session_context_.device_type.find("NPU") != std::string::npos) {
+  /* if (session_context_.device_type.find("NPU") != std::string::npos) {
     AllocatorCreationInfo npu_allocator_info{
         [this](OrtDevice::DeviceId device_id) {
           return std::make_unique<OVRTAllocator>(
@@ -223,9 +223,9 @@ std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators()
 
     // fill in allocator
     return std::vector<AllocatorPtr>{CreateAllocator(npu_allocator_info)};
-  } else {
+  } else {*/
     return std::vector<AllocatorPtr>{};
-  }
+  //}
 }
 #endif
 
diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
index a290fea73e0e8..ae5b9cef05810 100644
--- a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
+++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
@@ -127,7 +127,7 @@ reshape_t OpenVINOParserUtils::ParseInputShape(const std::string& reshape_input_
 
   // Regular expressions for parsing
   const std::regex tensor_pattern(R"(([^\[\],]+)\s*\[(.*?)\])");  // e.g. "input_1[1..5, 2, 3..4],data[1,2,3]"
-  // const std::regex dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)");  // e.g. "1..5", "2", "3..4"
+  // const  dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)");  // e.g. "1..5", "2", "3..4"
   const std::regex dimension_pattern(R"(\s*([^,\s]+)\s*)");
   // Find all tensor shape definitions using regex
   auto tensor_begin = std::sregex_iterator(
@@ -310,5 +310,38 @@ bool OpenVINOParserUtils::Check_Valid_Layout(const std::string& layout_str, cons
   return true;
 }
 
+affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_definition) {
+  LOGS_DEFAULT(INFO) << "[OpenVINO] Affinity is set : " << affinity_definition << "\n";
+  affinity_t result_map;
+
+  // Regex to capture device name and a list of nodes
+  // It captures:
+  // Group 1: device name (e.g., "device")
+  // Group 2: comma-separated list of nodes (e.g., "\"node1\", \"node2\"")
+  std::regex device_nodes_regex(R"(([^,\[\]]+)\[([^\]]+)\])");
+
+  std::sregex_iterator device_it(affinity_definition.begin(), affinity_definition.end(), device_nodes_regex);
+  std::sregex_iterator device_end;
+
+  for (; device_it != device_end; ++device_it) {
+    std::smatch device_match = *device_it;
+    std::string device_name = device_match[1].str();
+    std::string nodes_list_str = device_match[2].str();
+    std::cout << "device_name " << device_name << "\n";
+    std::cout << "nodes_list_str " << nodes_list_str << "\n";
+    std::stringstream nodes_list(nodes_list_str);
+    std::string item;
+
+    while (getline(nodes_list, item, ',')) {
+      result_map[item] = device_name; 
+    }
+  }
+
+  for (auto item : result_map){
+    std::cout << "\n" << item.first << " on " << item.second << "\n"; 
+  }
+  return result_map;
+}
+
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.h b/onnxruntime/core/providers/openvino/openvino_parser_utils.h
index a0936d627df40..090a133e906ab 100644
--- a/onnxruntime/core/providers/openvino/openvino_parser_utils.h
+++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.h
@@ -22,6 +22,7 @@ class OpenVINOParserUtils {
   static std::string TrimWhitespace(const std::string& str);
   static ov::Dimension ParseDimensionRange(const std::string& range_str, const std::string& tensor_name);
   static bool Check_Valid_Layout(const std::string& layout_str, const std::string& tensor_name);
+  static affinity_t ParseAffinity(const std::string& affinity_definition);
 };
 
 }  // namespace openvino_ep
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index 7eb5b062fe7c8..78ef874d45ae0 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -266,6 +266,11 @@ static void ParseProviderInfo(const ProviderOptions& provider_options,
     pi.layout = OpenVINOParserUtils::ParseLayout(provider_options.at("layout"));
   }
 
+  if (provider_options.contains("affinity")) {
+    std::cout << "Provider options contain affinity\n";
+    pi.affinity = OpenVINOParserUtils::ParseAffinity(provider_options.at("affinity"));
+  }
+
   if (provider_options.contains("load_config")) {
     auto parse_config = [&](const std::string& config_str) -> std::map<std::string, ov::AnyMap> {
       // If the config string is empty, return an empty map and skip processing
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 373b2121a9b60..51754046ae199 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -152,6 +152,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
     {"GridSample", V_2022_3, {"CPU"}},
     {"GridSample", V_2023_0, {"GPU"}},
+    {"GroupQueryAttention", V_2025_0, {"CPU"}},
     {"GRU", V_2024_1, {"CPU", "GPU"}},
     {"HardMax", V_2023_1, {"CPU", "GPU"}},
     {"Identity", V_2020_4, {"CPU", "GPU"}},
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 92cf6b085c01e..4961b290da8cf 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -1083,7 +1083,7 @@ static std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory
     ProviderOptions OV_provider_options_map;
     const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
                                                                  "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
-                                                                 "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"};
+                                                                 "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"};
     auto it = provider_options_map.find(type);
     if (it != provider_options_map.end()) {
       for (auto option : it->second) {
diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
index 2c9377d48f0c4..5cada81db449d 100644
--- a/onnxruntime/test/perftest/command_args_parser.cc
+++ b/onnxruntime/test/perftest/command_args_parser.cc
@@ -74,6 +74,7 @@ ABSL_FLAG(std::string, i, "",
           "  [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
           "  [OpenVINO only] [reshape_input]: Sets model input shapes with support for bounded dynamic dimensions using 'min..max' syntax (e.g., [1..10,3,224,224]) \n"
           "  [OpenVINO only] [layout]: Specifies the layout for inputs/outputs to interpret tensor dimensions correctly. \n"
+          "  [OpenVINO only] [affinity]: Specifies the affinity of a certain node to a specific device in Hetero Mode. \n"
           "  [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true reshape_input|<input_name>[1,3,60,60..100] layout|<input_name>[NCHW] cache_dir|\"<path>\"\"\n"
           "\n"
           "  [QNN only] [backend_type]: QNN backend type. E.g., 'cpu', 'htp'. Mutually exclusive with 'backend_path'.\n"
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index cb40a9beafeee..53b509fafe432 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -974,12 +974,14 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
         ov_options[key] = value;
       } else if (key == "layout") {
         ov_options[key] = value;
+      } else if (key == "affinity") {
+        ov_options[key] = value;
       } else {
         ORT_THROW(
             "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO."
             " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', "
             "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer',"
-            " 'enable_causallm', 'reshape_input', 'layout', 'model_priority'] \n");
+            " 'enable_causallm', 'reshape_input', 'layout', 'affinity', 'model_priority'] \n");
       }
     }
     session_options.AppendExecutionProvider_OpenVINO_V2(ov_options);

From 6aa58db96ac13a2038fede54713999bec9badacc Mon Sep 17 00:00:00 2001
From: sfatimar <sahar.fatima@intel.com>
Date: Mon, 8 Dec 2025 12:50:43 +0530
Subject: [PATCH 3/4] Merged Changes

---
 onnxruntime/core/providers/openvino/backend_utils.cc   |  8 ++++----
 .../core/providers/openvino/backends/basic_backend.cc  |  1 -
 .../core/providers/openvino/openvino_parser_utils.cc   | 10 +++++-----
 .../core/providers/openvino/ov_versions/data_ops.cc    |  2 +-
 4 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index 6de9463ffc2af..3397223b10c08 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -64,7 +64,7 @@ CreateOVModel(std::string&& model,
     }
 
     // Check for Constant Folding
-    if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
+    /* if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
       ov::pass::ConstantFolding pass_const_obj;
       pass_const_obj.run_on_model(ov_model);
       auto& results = const_cast<ov::ResultVector&>(ov_model.get()->get_results());
@@ -78,7 +78,7 @@ CreateOVModel(std::string&& model,
         }
         --index;
       }
-    }
+    }*/
 #ifndef NDEBUG
     if (IsDebugEnabled()) {
       std::string name = ov_model->get_friendly_name();
@@ -168,10 +168,10 @@ void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& ses
      auto it = session_context.affinity.find(name);
      if (it != session_context.affinity.end()) {
        ov_node->get_rt_info()["affinity"] = it->second;
-       std::cout << "node name " << name << " on " << it->second << "\t";
+       std::cout << name << " on " << it->second << "\n";
      } else {
        ov_node->get_rt_info()["affinity"] = selected_device;   
-       std::cout << "node name " << name << " on " << selected_device << "\t";
+       std::cout << name << " on " << selected_device << "\n";
      }
   }
 }
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index f82e72cab8ce3..508b20213d402 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -111,7 +111,6 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                                    subgraph_context_.subgraph_name);
       } else {  // For all other types use ov::ov_core read_model() to generate OV IR
                 // followed by ov::ov_core compile_model()
-        std::cout << "CreateOVModel\n";
         ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
         exe_network_ = OVCore::Get()->CompileModel(
             ov_model, hw_target, device_config, enable_causallm, subgraph_context_.subgraph_name);
diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
index ae5b9cef05810..52a5979468b07 100644
--- a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
+++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
@@ -327,8 +327,8 @@ affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_defini
     std::smatch device_match = *device_it;
     std::string device_name = device_match[1].str();
     std::string nodes_list_str = device_match[2].str();
-    std::cout << "device_name " << device_name << "\n";
-    std::cout << "nodes_list_str " << nodes_list_str << "\n";
+    //std::cout << "device_name " << device_name << "\n";
+    //std::cout << "nodes_list_str " << nodes_list_str << "\n";
     std::stringstream nodes_list(nodes_list_str);
     std::string item;
 
@@ -337,9 +337,9 @@ affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_defini
     }
   }
 
-  for (auto item : result_map){
-    std::cout << "\n" << item.first << " on " << item.second << "\n"; 
-  }
+  //for (auto item : result_map){
+  //  std::cout << "\n" << item.first << " on " << item.second << "\n"; 
+  //}
   return result_map;
 }
 
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 51754046ae199..6e4145c050ebb 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -152,7 +152,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
     {"GridSample", V_2022_3, {"CPU"}},
     {"GridSample", V_2023_0, {"GPU"}},
-    {"GroupQueryAttention", V_2025_0, {"CPU"}},
+    //{"GroupQueryAttention", V_2025_0, {"CPU"}},
     {"GRU", V_2024_1, {"CPU", "GPU"}},
     {"HardMax", V_2023_1, {"CPU", "GPU"}},
     {"Identity", V_2020_4, {"CPU", "GPU"}},

From 8cb91f1293cafd694266fc5f5d19ee4945c767f2 Mon Sep 17 00:00:00 2001
From: sfatimar <sahar.fatima@intel.com>
Date: Mon, 15 Dec 2025 13:09:45 +0530
Subject: [PATCH 4/4] Removing the prints

---
 .../providers/openvino/backend_manager.cc     |  4 +--
 .../core/providers/openvino/backend_utils.cc  | 33 ++++++++++++-------
 .../core/providers/openvino/backend_utils.h   |  2 ++
 .../openvino/backends/basic_backend.cc        |  1 -
 .../openvino/backends/basic_backend.h         |  7 ++--
 .../openvino/openvino_execution_provider.cc   |  1 -
 .../openvino/openvino_parser_utils.cc         |  6 ----
 .../openvino/openvino_provider_factory.cc     |  1 -
 .../core/providers/openvino/ov_interface.cc   |  2 +-
 .../openvino/ov_versions/data_ops.cc          |  2 +-
 10 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index e2eb6ea8b361d..d70e2c1acbe94 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -292,12 +292,12 @@ bool BackendManager::ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& s
   }
 
   // For dynamic models with incomplete reshape coverage, clear shapes
-  if (has_symbolic_dims && !all_dynamic_inputs_covered) {
+  /* if (has_symbolic_dims && !all_dynamic_inputs_covered) {
     session_context_.reshape.clear();
     LOGS_DEFAULT(WARNING) << "reshape_input does not cover all dynamic dimensions, "
                           << "ignoring all provided shapes";
     return true;  // Model is dynamic
-  }
+  }*/
 
   // If shapes are valid with complete coverage for dynamic model, treat as concrete
   if (has_symbolic_dims && shapes_valid && all_dynamic_inputs_covered) {
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index 4821565317a9c..139439a9ee43d 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -38,6 +38,16 @@ bool IsCILogEnabled() {
   return false;
 }
 
+std::string get_shapes_string(const reshape_t& shapes) {
+  std::stringstream ss;
+  for (auto& shape : shapes) {
+    if (!ss.str().empty())
+      ss << ", ";
+    ss << "\'" << shape.first << "': " << shape.second;
+  }
+  return ss.str();
+}
+
 std::shared_ptr<const OVNetwork>
 CreateOVModel(std::string&& model,
               const SessionContext& session_context,
@@ -46,25 +56,29 @@ CreateOVModel(std::string&& model,
     std::cout << "CreateNgraphFunc" << std::endl;
   }
   try {
-    auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string());
+     auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string());
+
+     if (!session_context.affinity.empty()) {
+       LOGS_DEFAULT(INFO) << log_tag << "Setting the ov nodes to specified affinity";
+       Set_Affinity(ov_model, session_context);
+     }
 
     if (!session_context.reshape.empty()) {
       LOGS_DEFAULT(INFO) << log_tag << "Reshaping the ov tensor to specified shape";
       ov_model->reshape(session_context.reshape);
     }
 
+     ov::preprocess::PrePostProcessor preproc(ov_model);
+     ov_model = preproc.build();
+
+
     if (!session_context.layout.empty()) {
       LOGS_DEFAULT(INFO) << log_tag << "Setting the ov tensor layout to specified layout";
       ov_model = Set_Layout(ov_model, session_context.layout);
     }
 
-    if (!session_context.affinity.empty()) {
-      LOGS_DEFAULT(INFO) << log_tag << "Setting the ov nodes to specified affinity";
-      Set_Affinity(ov_model, session_context);
-    }
-
     // Check for Constant Folding
-    /* if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
+    if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) {
       ov::pass::ConstantFolding pass_const_obj;
       pass_const_obj.run_on_model(ov_model);
       auto& results = const_cast<ov::ResultVector&>(ov_model.get()->get_results());
@@ -78,7 +92,7 @@ CreateOVModel(std::string&& model,
         }
         --index;
       }
-    }*/
+    }
 #ifndef NDEBUG
     if (IsDebugEnabled()) {
       std::string name = ov_model->get_friendly_name();
@@ -168,11 +182,8 @@ void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& ses
      auto it = session_context.affinity.find(name);
      if (it != session_context.affinity.end()) {
        ov_node->get_rt_info()["affinity"] = it->second;
-       std::cout << name << " on " << it->second << "\n";
      } else {
        ov_node->get_rt_info()["affinity"] = selected_device;   
-       std::cout << name << " on " << selected_device << "\n";
-
      }
   }
 }
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
index db32948a24655..15ac81db1bdc1 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -108,6 +108,8 @@ bool IsModelStreamXML(std::istream& model_stream);
 
 void Set_Affinity(std::shared_ptr<OVNetwork> ov_model, const SessionContext& session_context);
 
+std::string get_shapes_string(const reshape_t& shapes);
+
 }  // namespace backend_utils
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index f82e72cab8ce3..508b20213d402 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -111,7 +111,6 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
                                                    subgraph_context_.subgraph_name);
       } else {  // For all other types use ov::ov_core read_model() to generate OV IR
                 // followed by ov::ov_core compile_model()
-        std::cout << "CreateOVModel\n";
         ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_);
         exe_network_ = OVCore::Get()->CompileModel(
             ov_model, hw_target, device_config, enable_causallm, subgraph_context_.subgraph_name);
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 690835eabb391..453efc5a56ca9 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -86,10 +86,13 @@ struct OnnxToOvNetworkBindings {
         //            "Input names mismatch between OpenVINO and ONNX. ", onnx_name,
         //            " doesn't exist in the list of OpenVINO input tensor names");
 
+        if (!matched_names) {
+          continue; 
+        }
         auto ov_param_index = std::distance(ov_parameters.begin(), it);
-
         auto shape = ov_parameters[ov_param_index].get_partial_shape();
         auto type = ov_parameters[ov_param_index].get_element_type();
+
         ParameterInfo info{onnx_name, ov_param_index, onnx_param_index, type, ParameterShape{shape}};
 
         // Analyze shape dynamism and set flags
@@ -112,7 +115,7 @@ struct OnnxToOvNetworkBindings {
           info.SetFullyDynamic(has_fully_dynamic);
           info.SetBoundedDynamic(has_bounded_dynamic);
         }
-
+        
         input_output_map.push_back(std::move(info));
       }
     };
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index 6c8b60324bbbf..77b0821412e4c 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -184,7 +184,6 @@ common::Status OpenVINOExecutionProvider::Compile(
 
       for (const auto& fused_node_graph : fused_nodes) {
         const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
-
         // Set include_embed_data to true only for the first backend manager
         backend_it->TryExportCompiledBlobAsEPCtxNode(graph_body_viewer, is_first);
 
diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
index 54510ce333a49..c4daa2232eebf 100644
--- a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
+++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc
@@ -327,8 +327,6 @@ affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_defini
     std::smatch device_match = *device_it;
     std::string device_name = device_match[1].str();
     std::string nodes_list_str = device_match[2].str();
-    //std::cout << "device_name " << device_name << "\n";
-    //std::cout << "nodes_list_str " << nodes_list_str << "\n";
     std::stringstream nodes_list(nodes_list_str);
     std::string item;
 
@@ -337,10 +335,6 @@ affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_defini
     }
   }
 
-  //for (auto item : result_map){
-  //  std::cout << "\n" << item.first << " on " << item.second << "\n"; 
-  //}
-
   return result_map;
 }
 
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index 78ef874d45ae0..38d52151465cd 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -267,7 +267,6 @@ static void ParseProviderInfo(const ProviderOptions& provider_options,
   }
 
   if (provider_options.contains("affinity")) {
-    std::cout << "Provider options contain affinity\n";
     pi.affinity = OpenVINOParserUtils::ParseAffinity(provider_options.at("affinity"));
   }
 
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
index 23be3447b8799..61477fe480566 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.cc
+++ b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -84,7 +84,7 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(std::string&& model, const std::str
     ov::frontend::InputModel::Ptr inputModel;
 
     ov::AnyVector params{&modelStream, model_path};
-
+    
     FE = manager.load_by_model(params);
     if (FE) {
       inputModel = FE->load(params);
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 6e4145c050ebb..51754046ae199 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -152,7 +152,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
     {"GridSample", V_2022_3, {"CPU"}},
     {"GridSample", V_2023_0, {"GPU"}},
-    //{"GroupQueryAttention", V_2025_0, {"CPU"}},
+    {"GroupQueryAttention", V_2025_0, {"CPU"}},
     {"GRU", V_2024_1, {"CPU", "GPU"}},
     {"HardMax", V_2023_1, {"CPU", "GPU"}},
     {"Identity", V_2020_4, {"CPU", "GPU"}},