From 9e0c923e9228d423936c6b7fc4bc0a59315c4cb1 Mon Sep 17 00:00:00 2001 From: sfatimar Date: Thu, 4 Dec 2025 11:47:28 +0530 Subject: [PATCH 1/4] Affinity Draft Commit --- .../providers/openvino/backend_manager.cc | 4 ++- .../core/providers/openvino/backend_utils.cc | 35 +++++++++++++++++++ .../core/providers/openvino/backend_utils.h | 2 ++ .../openvino/backends/basic_backend.cc | 2 ++ .../openvino/backends/basic_backend.h | 6 ++-- .../core/providers/openvino/contexts.h | 4 ++- .../openvino/openvino_execution_provider.cc | 8 ++--- .../openvino/openvino_parser_utils.cc | 35 ++++++++++++++++++- .../openvino/openvino_parser_utils.h | 1 + .../openvino/openvino_provider_factory.cc | 5 +++ .../openvino/ov_versions/data_ops.cc | 1 + .../python/onnxruntime_pybind_state.cc | 2 +- .../test/perftest/command_args_parser.cc | 1 + onnxruntime/test/perftest/ort_test_session.cc | 4 ++- 14 files changed, 98 insertions(+), 12 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 3426a2781bbc6..e2eb6ea8b361d 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -476,7 +476,9 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, }; [[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph); - [[maybe_unused]] std::optional enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type); + [[maybe_unused]] std::optional enable_compiler_qdq_optimization = false; + if (session_context_.device_type.find("NPU") != std::string::npos) + enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", "NPU"); #if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025)) if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) { if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) { diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 45e518d16686e..6de9463ffc2af 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -57,6 +57,12 @@ CreateOVModel(std::string&& model, LOGS_DEFAULT(INFO) << log_tag << "Setting the ov tensor layout to specified layout"; ov_model = Set_Layout(ov_model, session_context.layout); } + + if (!session_context.affinity.empty()) { + LOGS_DEFAULT(INFO) << log_tag << "Setting the ov nodes to specified affinity"; + Set_Affinity(ov_model, session_context); + } + // Check for Constant Folding if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) { ov::pass::ConstantFolding pass_const_obj; @@ -141,6 +147,35 @@ std::shared_ptr Set_Layout(std::shared_ptr ov_model, const return preproc.build(); } +void Set_Affinity(std::shared_ptr ov_model, const SessionContext& session_context) { + + std::string selected_device = "CPU"; + if (auto delimit = session_context.device_type.find(":"); delimit != std::string::npos) { + auto device_mode = session_context.device_type.substr(0, delimit); + if (device_mode.find("HETERO") != std::string::npos) { + const auto& devices = session_context.device_type.substr(delimit + 1); + auto delimit_comma = devices.find(","); + selected_device = devices.substr(0, delimit_comma); + } else { + ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO"); + } + } else { + ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO"); + } + + for (auto&& ov_node : ov_model->get_ops()) { + auto name = ov_node->get_friendly_name(); + auto it = session_context.affinity.find(name); + if (it != session_context.affinity.end()) { + ov_node->get_rt_info()["affinity"] = it->second; + std::cout << "node name " << name << " on " << it->second << "\t"; + } else { + ov_node->get_rt_info()["affinity"] = selected_device; + std::cout << "node name " << name << " on " << selected_device << "\t"; + } + } +} + int GetFirstAvailableDevice(SessionContext& session_context) { int i = 0; // Get the first available VAD-M device and set the device to busy diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h index 8ba35e0abd1bc..db32948a24655 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.h +++ b/onnxruntime/core/providers/openvino/backend_utils.h @@ -106,6 +106,8 @@ void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std bool IsModelStreamXML(std::istream& model_stream); +void Set_Affinity(std::shared_ptr ov_model, const SessionContext& session_context); + } // namespace backend_utils } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index d7fc0553fb1d4..f82e72cab8ce3 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -99,6 +99,7 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr !session_context_.so_context_enable && session_context_.reshape.empty() && session_context_.layout.empty() && + session_context_.affinity.empty() && !enable_causallm && !eligible_for_cpu_fallback && auto_unified_compile); @@ -110,6 +111,7 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr subgraph_context_.subgraph_name); } else { // For all other types use ov::ov_core read_model() to generate OV IR // followed by ov::ov_core compile_model() + std::cout << "CreateOVModel\n"; ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_); exe_network_ = OVCore::Get()->CompileModel( ov_model, hw_target, device_config, enable_causallm, subgraph_context_.subgraph_name); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 2cf3d3faa8b47..690835eabb391 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -82,9 +82,9 @@ struct OnnxToOvNetworkBindings { } } - ORT_ENFORCE(matched_names, log_tag, - "Input names mismatch between OpenVINO and ONNX. ", onnx_name, - " doesn't exist in the list of OpenVINO input tensor names"); + //ORT_ENFORCE(matched_names, log_tag, + // "Input names mismatch between OpenVINO and ONNX. ", onnx_name, + // " doesn't exist in the list of OpenVINO input tensor names"); auto ov_param_index = std::distance(ov_parameters.begin(), it); diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index ebb716a64162c..bcbb463733b89 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -24,6 +24,7 @@ namespace fs = std::filesystem; using config_t = std::map; using reshape_t = std::map; using layout_t = std::map; +using affinity_t = std::map; struct ProviderInfo { std::string device_type{""}; // [device_type]: Overrides the accelerator hardware type and @@ -43,6 +44,7 @@ struct ProviderInfo { // it will be directly loaded. reshape_t reshape{}; // Used for reshaping the ov input tensor shape at runtime. layout_t layout{}; // Used for specifying the ov input/output tensor layout at runtime. + affinity_t affinity{}; // Used for specifying the nodes affinity at runtime. std::string model_priority{"DEFAULT"}; // High-level OpenVINO model priority hint // Defines what model should be provided with more performant // bounded resource first @@ -66,7 +68,7 @@ struct ProviderInfo { const ConfigOptions* config_options{NULL}; const std::unordered_set valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision", "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer", - "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"}; + "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"}; }; struct RuntimeConfig { diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index a099f85b2a4b9..6c8b60324bbbf 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -207,9 +207,9 @@ common::Status OpenVINOExecutionProvider::Compile( return status; } -#ifdef USE_OVEP_NPU_MEMORY + #ifdef USE_OVEP_NPU_MEMORY std::vector OpenVINOExecutionProvider::CreatePreferredAllocators() { - if (session_context_.device_type.find("NPU") != std::string::npos) { + /* if (session_context_.device_type.find("NPU") != std::string::npos) { AllocatorCreationInfo npu_allocator_info{ [this](OrtDevice::DeviceId device_id) { return std::make_unique( @@ -223,9 +223,9 @@ std::vector OpenVINOExecutionProvider::CreatePreferredAllocators() // fill in allocator return std::vector{CreateAllocator(npu_allocator_info)}; - } else { + } else {*/ return std::vector{}; - } + //} } #endif diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc index a290fea73e0e8..ae5b9cef05810 100644 --- a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc +++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc @@ -127,7 +127,7 @@ reshape_t OpenVINOParserUtils::ParseInputShape(const std::string& reshape_input_ // Regular expressions for parsing const std::regex tensor_pattern(R"(([^\[\],]+)\s*\[(.*?)\])"); // e.g. "input_1[1..5, 2, 3..4],data[1,2,3]" - // const std::regex dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)"); // e.g. "1..5", "2", "3..4" + // const dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)"); // e.g. "1..5", "2", "3..4" const std::regex dimension_pattern(R"(\s*([^,\s]+)\s*)"); // Find all tensor shape definitions using regex auto tensor_begin = std::sregex_iterator( @@ -310,5 +310,38 @@ bool OpenVINOParserUtils::Check_Valid_Layout(const std::string& layout_str, cons return true; } +affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_definition) { + LOGS_DEFAULT(INFO) << "[OpenVINO] Affinity is set : " << affinity_definition << "\n"; + affinity_t result_map; + + // Regex to capture device name and a list of nodes + // It captures: + // Group 1: device name (e.g., "device") + // Group 2: comma-separated list of nodes (e.g., "\"node1\", \"node2\"") + std::regex device_nodes_regex(R"(([^,\[\]]+)\[([^\]]+)\])"); + + std::sregex_iterator device_it(affinity_definition.begin(), affinity_definition.end(), device_nodes_regex); + std::sregex_iterator device_end; + + for (; device_it != device_end; ++device_it) { + std::smatch device_match = *device_it; + std::string device_name = device_match[1].str(); + std::string nodes_list_str = device_match[2].str(); + std::cout << "device_name " << device_name << "\n"; + std::cout << "nodes_list_str " << nodes_list_str << "\n"; + std::stringstream nodes_list(nodes_list_str); + std::string item; + + while (getline(nodes_list, item, ',')) { + result_map[item] = device_name; + } + } + + for (auto item : result_map){ + std::cout << "\n" << item.first << " on " << item.second << "\n"; + } + return result_map; +} + } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.h b/onnxruntime/core/providers/openvino/openvino_parser_utils.h index a0936d627df40..090a133e906ab 100644 --- a/onnxruntime/core/providers/openvino/openvino_parser_utils.h +++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.h @@ -22,6 +22,7 @@ class OpenVINOParserUtils { static std::string TrimWhitespace(const std::string& str); static ov::Dimension ParseDimensionRange(const std::string& range_str, const std::string& tensor_name); static bool Check_Valid_Layout(const std::string& layout_str, const std::string& tensor_name); + static affinity_t ParseAffinity(const std::string& affinity_definition); }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 7eb5b062fe7c8..78ef874d45ae0 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -266,6 +266,11 @@ static void ParseProviderInfo(const ProviderOptions& provider_options, pi.layout = OpenVINOParserUtils::ParseLayout(provider_options.at("layout")); } + if (provider_options.contains("affinity")) { + std::cout << "Provider options contain affinity\n"; + pi.affinity = OpenVINOParserUtils::ParseAffinity(provider_options.at("affinity")); + } + if (provider_options.contains("load_config")) { auto parse_config = [&](const std::string& config_str) -> std::map { // If the config string is empty, return an empty map and skip processing diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index 373b2121a9b60..51754046ae199 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -152,6 +152,7 @@ std::vector supported_op_mode = { {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}}, {"GridSample", V_2022_3, {"CPU"}}, {"GridSample", V_2023_0, {"GPU"}}, + {"GroupQueryAttention", V_2025_0, {"CPU"}}, {"GRU", V_2024_1, {"CPU", "GPU"}}, {"HardMax", V_2023_1, {"CPU", "GPU"}}, {"Identity", V_2020_4, {"CPU", "GPU"}}, diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 92cf6b085c01e..4961b290da8cf 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -1083,7 +1083,7 @@ static std::shared_ptr CreateExecutionProviderFactory ProviderOptions OV_provider_options_map; const std::unordered_set valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision", "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer", - "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"}; + "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"}; auto it = provider_options_map.find(type); if (it != provider_options_map.end()) { for (auto option : it->second) { diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 8960898f036fc..a1648edd68b98 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -71,6 +71,7 @@ ABSL_FLAG(std::string, i, "", " [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n" " [OpenVINO only] [reshape_input]: Sets model input shapes with support for bounded dynamic dimensions using 'min..max' syntax (e.g., [1..10,3,224,224]) \n" " [OpenVINO only] [layout]: Specifies the layout for inputs/outputs to interpret tensor dimensions correctly. \n" + " [OpenVINO only] [affinity]: Specifies the affinity of a certain node to a specific device in Hetero Mode. \n" " [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true reshape_input|[1,3,60,60..100] layout|[NCHW] cache_dir|\"\"\"\n" "\n" " [QNN only] [backend_type]: QNN backend type. E.g., 'cpu', 'htp'. Mutually exclusive with 'backend_path'.\n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 0f2da07c69d85..59be37803cdf0 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -947,12 +947,14 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); ov_options[key] = value; } else if (key == "layout") { ov_options[key] = value; + } else if (key == "affinity") { + ov_options[key] = value; } else { ORT_THROW( "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO." " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', " "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer'," - " 'enable_causallm', 'reshape_input', 'layout', 'model_priority'] \n"); + " 'enable_causallm', 'reshape_input', 'layout', 'affinity', 'model_priority'] \n"); } } session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); From b521399e3306c91db760f3dd72c039ca1ee3ef82 Mon Sep 17 00:00:00 2001 From: sfatimar Date: Thu, 4 Dec 2025 11:47:28 +0530 Subject: [PATCH 2/4] Affinity Draft Commit --- .../providers/openvino/backend_manager.cc | 4 ++- .../core/providers/openvino/backend_utils.cc | 35 +++++++++++++++++++ .../core/providers/openvino/backend_utils.h | 2 ++ .../openvino/backends/basic_backend.cc | 2 ++ .../openvino/backends/basic_backend.h | 6 ++-- .../core/providers/openvino/contexts.h | 4 ++- .../openvino/openvino_execution_provider.cc | 8 ++--- .../openvino/openvino_parser_utils.cc | 35 ++++++++++++++++++- .../openvino/openvino_parser_utils.h | 1 + .../openvino/openvino_provider_factory.cc | 5 +++ .../openvino/ov_versions/data_ops.cc | 1 + .../python/onnxruntime_pybind_state.cc | 2 +- .../test/perftest/command_args_parser.cc | 1 + onnxruntime/test/perftest/ort_test_session.cc | 4 ++- 14 files changed, 98 insertions(+), 12 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 3426a2781bbc6..e2eb6ea8b361d 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -476,7 +476,9 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, }; [[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph); - [[maybe_unused]] std::optional enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type); + [[maybe_unused]] std::optional enable_compiler_qdq_optimization = false; + if (session_context_.device_type.find("NPU") != std::string::npos) + enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", "NPU"); #if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025)) if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) { if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) { diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 45e518d16686e..6de9463ffc2af 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -57,6 +57,12 @@ CreateOVModel(std::string&& model, LOGS_DEFAULT(INFO) << log_tag << "Setting the ov tensor layout to specified layout"; ov_model = Set_Layout(ov_model, session_context.layout); } + + if (!session_context.affinity.empty()) { + LOGS_DEFAULT(INFO) << log_tag << "Setting the ov nodes to specified affinity"; + Set_Affinity(ov_model, session_context); + } + // Check for Constant Folding if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) { ov::pass::ConstantFolding pass_const_obj; @@ -141,6 +147,35 @@ std::shared_ptr Set_Layout(std::shared_ptr ov_model, const return preproc.build(); } +void Set_Affinity(std::shared_ptr ov_model, const SessionContext& session_context) { + + std::string selected_device = "CPU"; + if (auto delimit = session_context.device_type.find(":"); delimit != std::string::npos) { + auto device_mode = session_context.device_type.substr(0, delimit); + if (device_mode.find("HETERO") != std::string::npos) { + const auto& devices = session_context.device_type.substr(delimit + 1); + auto delimit_comma = devices.find(","); + selected_device = devices.substr(0, delimit_comma); + } else { + ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO"); + } + } else { + ORT_THROW("[ERROR] [OpenVINO] Invalid device_type is selected. Supported modes is HETERO"); + } + + for (auto&& ov_node : ov_model->get_ops()) { + auto name = ov_node->get_friendly_name(); + auto it = session_context.affinity.find(name); + if (it != session_context.affinity.end()) { + ov_node->get_rt_info()["affinity"] = it->second; + std::cout << "node name " << name << " on " << it->second << "\t"; + } else { + ov_node->get_rt_info()["affinity"] = selected_device; + std::cout << "node name " << name << " on " << selected_device << "\t"; + } + } +} + int GetFirstAvailableDevice(SessionContext& session_context) { int i = 0; // Get the first available VAD-M device and set the device to busy diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h index 8ba35e0abd1bc..db32948a24655 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.h +++ b/onnxruntime/core/providers/openvino/backend_utils.h @@ -106,6 +106,8 @@ void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std bool IsModelStreamXML(std::istream& model_stream); +void Set_Affinity(std::shared_ptr ov_model, const SessionContext& session_context); + } // namespace backend_utils } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index d7fc0553fb1d4..f82e72cab8ce3 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -99,6 +99,7 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr !session_context_.so_context_enable && session_context_.reshape.empty() && session_context_.layout.empty() && + session_context_.affinity.empty() && !enable_causallm && !eligible_for_cpu_fallback && auto_unified_compile); @@ -110,6 +111,7 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr subgraph_context_.subgraph_name); } else { // For all other types use ov::ov_core read_model() to generate OV IR // followed by ov::ov_core compile_model() + std::cout << "CreateOVModel\n"; ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_); exe_network_ = OVCore::Get()->CompileModel( ov_model, hw_target, device_config, enable_causallm, subgraph_context_.subgraph_name); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 2cf3d3faa8b47..690835eabb391 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -82,9 +82,9 @@ struct OnnxToOvNetworkBindings { } } - ORT_ENFORCE(matched_names, log_tag, - "Input names mismatch between OpenVINO and ONNX. ", onnx_name, - " doesn't exist in the list of OpenVINO input tensor names"); + //ORT_ENFORCE(matched_names, log_tag, + // "Input names mismatch between OpenVINO and ONNX. ", onnx_name, + // " doesn't exist in the list of OpenVINO input tensor names"); auto ov_param_index = std::distance(ov_parameters.begin(), it); diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index ebb716a64162c..bcbb463733b89 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -24,6 +24,7 @@ namespace fs = std::filesystem; using config_t = std::map; using reshape_t = std::map; using layout_t = std::map; +using affinity_t = std::map; struct ProviderInfo { std::string device_type{""}; // [device_type]: Overrides the accelerator hardware type and @@ -43,6 +44,7 @@ struct ProviderInfo { // it will be directly loaded. reshape_t reshape{}; // Used for reshaping the ov input tensor shape at runtime. layout_t layout{}; // Used for specifying the ov input/output tensor layout at runtime. + affinity_t affinity{}; // Used for specifying the nodes affinity at runtime. std::string model_priority{"DEFAULT"}; // High-level OpenVINO model priority hint // Defines what model should be provided with more performant // bounded resource first @@ -66,7 +68,7 @@ struct ProviderInfo { const ConfigOptions* config_options{NULL}; const std::unordered_set valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision", "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer", - "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"}; + "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"}; }; struct RuntimeConfig { diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index a099f85b2a4b9..6c8b60324bbbf 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -207,9 +207,9 @@ common::Status OpenVINOExecutionProvider::Compile( return status; } -#ifdef USE_OVEP_NPU_MEMORY + #ifdef USE_OVEP_NPU_MEMORY std::vector OpenVINOExecutionProvider::CreatePreferredAllocators() { - if (session_context_.device_type.find("NPU") != std::string::npos) { + /* if (session_context_.device_type.find("NPU") != std::string::npos) { AllocatorCreationInfo npu_allocator_info{ [this](OrtDevice::DeviceId device_id) { return std::make_unique( @@ -223,9 +223,9 @@ std::vector OpenVINOExecutionProvider::CreatePreferredAllocators() // fill in allocator return std::vector{CreateAllocator(npu_allocator_info)}; - } else { + } else {*/ return std::vector{}; - } + //} } #endif diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc index a290fea73e0e8..ae5b9cef05810 100644 --- a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc +++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc @@ -127,7 +127,7 @@ reshape_t OpenVINOParserUtils::ParseInputShape(const std::string& reshape_input_ // Regular expressions for parsing const std::regex tensor_pattern(R"(([^\[\],]+)\s*\[(.*?)\])"); // e.g. "input_1[1..5, 2, 3..4],data[1,2,3]" - // const std::regex dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)"); // e.g. "1..5", "2", "3..4" + // const dimension_pattern(R"(\s*(\d+(?:\.\.\d+)?)\s*)"); // e.g. "1..5", "2", "3..4" const std::regex dimension_pattern(R"(\s*([^,\s]+)\s*)"); // Find all tensor shape definitions using regex auto tensor_begin = std::sregex_iterator( @@ -310,5 +310,38 @@ bool OpenVINOParserUtils::Check_Valid_Layout(const std::string& layout_str, cons return true; } +affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_definition) { + LOGS_DEFAULT(INFO) << "[OpenVINO] Affinity is set : " << affinity_definition << "\n"; + affinity_t result_map; + + // Regex to capture device name and a list of nodes + // It captures: + // Group 1: device name (e.g., "device") + // Group 2: comma-separated list of nodes (e.g., "\"node1\", \"node2\"") + std::regex device_nodes_regex(R"(([^,\[\]]+)\[([^\]]+)\])"); + + std::sregex_iterator device_it(affinity_definition.begin(), affinity_definition.end(), device_nodes_regex); + std::sregex_iterator device_end; + + for (; device_it != device_end; ++device_it) { + std::smatch device_match = *device_it; + std::string device_name = device_match[1].str(); + std::string nodes_list_str = device_match[2].str(); + std::cout << "device_name " << device_name << "\n"; + std::cout << "nodes_list_str " << nodes_list_str << "\n"; + std::stringstream nodes_list(nodes_list_str); + std::string item; + + while (getline(nodes_list, item, ',')) { + result_map[item] = device_name; + } + } + + for (auto item : result_map){ + std::cout << "\n" << item.first << " on " << item.second << "\n"; + } + return result_map; +} + } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.h b/onnxruntime/core/providers/openvino/openvino_parser_utils.h index a0936d627df40..090a133e906ab 100644 --- a/onnxruntime/core/providers/openvino/openvino_parser_utils.h +++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.h @@ -22,6 +22,7 @@ class OpenVINOParserUtils { static std::string TrimWhitespace(const std::string& str); static ov::Dimension ParseDimensionRange(const std::string& range_str, const std::string& tensor_name); static bool Check_Valid_Layout(const std::string& layout_str, const std::string& tensor_name); + static affinity_t ParseAffinity(const std::string& affinity_definition); }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 7eb5b062fe7c8..78ef874d45ae0 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -266,6 +266,11 @@ static void ParseProviderInfo(const ProviderOptions& provider_options, pi.layout = OpenVINOParserUtils::ParseLayout(provider_options.at("layout")); } + if (provider_options.contains("affinity")) { + std::cout << "Provider options contain affinity\n"; + pi.affinity = OpenVINOParserUtils::ParseAffinity(provider_options.at("affinity")); + } + if (provider_options.contains("load_config")) { auto parse_config = [&](const std::string& config_str) -> std::map { // If the config string is empty, return an empty map and skip processing diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index 373b2121a9b60..51754046ae199 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -152,6 +152,7 @@ std::vector supported_op_mode = { {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}}, {"GridSample", V_2022_3, {"CPU"}}, {"GridSample", V_2023_0, {"GPU"}}, + {"GroupQueryAttention", V_2025_0, {"CPU"}}, {"GRU", V_2024_1, {"CPU", "GPU"}}, {"HardMax", V_2023_1, {"CPU", "GPU"}}, {"Identity", V_2020_4, {"CPU", "GPU"}}, diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 92cf6b085c01e..4961b290da8cf 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -1083,7 +1083,7 @@ static std::shared_ptr CreateExecutionProviderFactory ProviderOptions OV_provider_options_map; const std::unordered_set valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision", "load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer", - "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout"}; + "enable_causallm", "disable_dynamic_shapes", "reshape_input", "layout", "affinity"}; auto it = provider_options_map.find(type); if (it != provider_options_map.end()) { for (auto option : it->second) { diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 2c9377d48f0c4..5cada81db449d 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -74,6 +74,7 @@ ABSL_FLAG(std::string, i, "", " [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n" " [OpenVINO only] [reshape_input]: Sets model input shapes with support for bounded dynamic dimensions using 'min..max' syntax (e.g., [1..10,3,224,224]) \n" " [OpenVINO only] [layout]: Specifies the layout for inputs/outputs to interpret tensor dimensions correctly. \n" + " [OpenVINO only] [affinity]: Specifies the affinity of a certain node to a specific device in Hetero Mode. \n" " [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true reshape_input|[1,3,60,60..100] layout|[NCHW] cache_dir|\"\"\"\n" "\n" " [QNN only] [backend_type]: QNN backend type. E.g., 'cpu', 'htp'. Mutually exclusive with 'backend_path'.\n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index cb40a9beafeee..53b509fafe432 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -974,12 +974,14 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); ov_options[key] = value; } else if (key == "layout") { ov_options[key] = value; + } else if (key == "affinity") { + ov_options[key] = value; } else { ORT_THROW( "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO." " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', " "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer'," - " 'enable_causallm', 'reshape_input', 'layout', 'model_priority'] \n"); + " 'enable_causallm', 'reshape_input', 'layout', 'affinity', 'model_priority'] \n"); } } session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); From 6aa58db96ac13a2038fede54713999bec9badacc Mon Sep 17 00:00:00 2001 From: sfatimar Date: Mon, 8 Dec 2025 12:50:43 +0530 Subject: [PATCH 3/4] Merged Changes --- onnxruntime/core/providers/openvino/backend_utils.cc | 8 ++++---- .../core/providers/openvino/backends/basic_backend.cc | 1 - .../core/providers/openvino/openvino_parser_utils.cc | 10 +++++----- .../core/providers/openvino/ov_versions/data_ops.cc | 2 +- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 6de9463ffc2af..3397223b10c08 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -64,7 +64,7 @@ CreateOVModel(std::string&& model, } // Check for Constant Folding - if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) { + /* if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) { ov::pass::ConstantFolding pass_const_obj; pass_const_obj.run_on_model(ov_model); auto& results = const_cast(ov_model.get()->get_results()); @@ -78,7 +78,7 @@ CreateOVModel(std::string&& model, } --index; } - } + }*/ #ifndef NDEBUG if (IsDebugEnabled()) { std::string name = ov_model->get_friendly_name(); @@ -168,10 +168,10 @@ void Set_Affinity(std::shared_ptr ov_model, const SessionContext& ses auto it = session_context.affinity.find(name); if (it != session_context.affinity.end()) { ov_node->get_rt_info()["affinity"] = it->second; - std::cout << "node name " << name << " on " << it->second << "\t"; + std::cout << name << " on " << it->second << "\n"; } else { ov_node->get_rt_info()["affinity"] = selected_device; - std::cout << "node name " << name << " on " << selected_device << "\t"; + std::cout << name << " on " << selected_device << "\n"; } } } diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index f82e72cab8ce3..508b20213d402 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -111,7 +111,6 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr subgraph_context_.subgraph_name); } else { // For all other types use ov::ov_core read_model() to generate OV IR // followed by ov::ov_core compile_model() - std::cout << "CreateOVModel\n"; ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_); exe_network_ = OVCore::Get()->CompileModel( ov_model, hw_target, device_config, enable_causallm, subgraph_context_.subgraph_name); diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc index ae5b9cef05810..52a5979468b07 100644 --- a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc +++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc @@ -327,8 +327,8 @@ affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_defini std::smatch device_match = *device_it; std::string device_name = device_match[1].str(); std::string nodes_list_str = device_match[2].str(); - std::cout << "device_name " << device_name << "\n"; - std::cout << "nodes_list_str " << nodes_list_str << "\n"; + //std::cout << "device_name " << device_name << "\n"; + //std::cout << "nodes_list_str " << nodes_list_str << "\n"; std::stringstream nodes_list(nodes_list_str); std::string item; @@ -337,9 +337,9 @@ affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_defini } } - for (auto item : result_map){ - std::cout << "\n" << item.first << " on " << item.second << "\n"; - } + //for (auto item : result_map){ + // std::cout << "\n" << item.first << " on " << item.second << "\n"; + //} return result_map; } diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index 51754046ae199..6e4145c050ebb 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -152,7 +152,7 @@ std::vector supported_op_mode = { {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}}, {"GridSample", V_2022_3, {"CPU"}}, {"GridSample", V_2023_0, {"GPU"}}, - {"GroupQueryAttention", V_2025_0, {"CPU"}}, + //{"GroupQueryAttention", V_2025_0, {"CPU"}}, {"GRU", V_2024_1, {"CPU", "GPU"}}, {"HardMax", V_2023_1, {"CPU", "GPU"}}, {"Identity", V_2020_4, {"CPU", "GPU"}}, From 8cb91f1293cafd694266fc5f5d19ee4945c767f2 Mon Sep 17 00:00:00 2001 From: sfatimar Date: Mon, 15 Dec 2025 13:09:45 +0530 Subject: [PATCH 4/4] Removing the prints --- .../providers/openvino/backend_manager.cc | 4 +-- .../core/providers/openvino/backend_utils.cc | 33 ++++++++++++------- .../core/providers/openvino/backend_utils.h | 2 ++ .../openvino/backends/basic_backend.cc | 1 - .../openvino/backends/basic_backend.h | 7 ++-- .../openvino/openvino_execution_provider.cc | 1 - .../openvino/openvino_parser_utils.cc | 6 ---- .../openvino/openvino_provider_factory.cc | 1 - .../core/providers/openvino/ov_interface.cc | 2 +- .../openvino/ov_versions/data_ops.cc | 2 +- 10 files changed, 33 insertions(+), 26 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index e2eb6ea8b361d..d70e2c1acbe94 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -292,12 +292,12 @@ bool BackendManager::ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& s } // For dynamic models with incomplete reshape coverage, clear shapes - if (has_symbolic_dims && !all_dynamic_inputs_covered) { + /* if (has_symbolic_dims && !all_dynamic_inputs_covered) { session_context_.reshape.clear(); LOGS_DEFAULT(WARNING) << "reshape_input does not cover all dynamic dimensions, " << "ignoring all provided shapes"; return true; // Model is dynamic - } + }*/ // If shapes are valid with complete coverage for dynamic model, treat as concrete if (has_symbolic_dims && shapes_valid && all_dynamic_inputs_covered) { diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 4821565317a9c..139439a9ee43d 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -38,6 +38,16 @@ bool IsCILogEnabled() { return false; } +std::string get_shapes_string(const reshape_t& shapes) { + std::stringstream ss; + for (auto& shape : shapes) { + if (!ss.str().empty()) + ss << ", "; + ss << "\'" << shape.first << "': " << shape.second; + } + return ss.str(); +} + std::shared_ptr CreateOVModel(std::string&& model, const SessionContext& session_context, @@ -46,25 +56,29 @@ CreateOVModel(std::string&& model, std::cout << "CreateNgraphFunc" << std::endl; } try { - auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string()); + auto ov_model = OVCore::Get()->ReadModel(std::move(model), session_context.onnx_model_path_name.string()); + + if (!session_context.affinity.empty()) { + LOGS_DEFAULT(INFO) << log_tag << "Setting the ov nodes to specified affinity"; + Set_Affinity(ov_model, session_context); + } if (!session_context.reshape.empty()) { LOGS_DEFAULT(INFO) << log_tag << "Reshaping the ov tensor to specified shape"; ov_model->reshape(session_context.reshape); } + ov::preprocess::PrePostProcessor preproc(ov_model); + ov_model = preproc.build(); + + if (!session_context.layout.empty()) { LOGS_DEFAULT(INFO) << log_tag << "Setting the ov tensor layout to specified layout"; ov_model = Set_Layout(ov_model, session_context.layout); } - if (!session_context.affinity.empty()) { - LOGS_DEFAULT(INFO) << log_tag << "Setting the ov nodes to specified affinity"; - Set_Affinity(ov_model, session_context); - } - // Check for Constant Folding - /* if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) { + if ((session_context.device_type != "NPU") && !session_context.is_wholly_supported_graph) { ov::pass::ConstantFolding pass_const_obj; pass_const_obj.run_on_model(ov_model); auto& results = const_cast(ov_model.get()->get_results()); @@ -78,7 +92,7 @@ CreateOVModel(std::string&& model, } --index; } - }*/ + } #ifndef NDEBUG if (IsDebugEnabled()) { std::string name = ov_model->get_friendly_name(); @@ -168,11 +182,8 @@ void Set_Affinity(std::shared_ptr ov_model, const SessionContext& ses auto it = session_context.affinity.find(name); if (it != session_context.affinity.end()) { ov_node->get_rt_info()["affinity"] = it->second; - std::cout << name << " on " << it->second << "\n"; } else { ov_node->get_rt_info()["affinity"] = selected_device; - std::cout << name << " on " << selected_device << "\n"; - } } } diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h index db32948a24655..15ac81db1bdc1 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.h +++ b/onnxruntime/core/providers/openvino/backend_utils.h @@ -108,6 +108,8 @@ bool IsModelStreamXML(std::istream& model_stream); void Set_Affinity(std::shared_ptr ov_model, const SessionContext& session_context); +std::string get_shapes_string(const reshape_t& shapes); + } // namespace backend_utils } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index f82e72cab8ce3..508b20213d402 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -111,7 +111,6 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr subgraph_context_.subgraph_name); } else { // For all other types use ov::ov_core read_model() to generate OV IR // followed by ov::ov_core compile_model() - std::cout << "CreateOVModel\n"; ov_model = CreateOVModel(std::move(model), session_context_, const_outputs_map_); exe_network_ = OVCore::Get()->CompileModel( ov_model, hw_target, device_config, enable_causallm, subgraph_context_.subgraph_name); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 690835eabb391..453efc5a56ca9 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -86,10 +86,13 @@ struct OnnxToOvNetworkBindings { // "Input names mismatch between OpenVINO and ONNX. ", onnx_name, // " doesn't exist in the list of OpenVINO input tensor names"); + if (!matched_names) { + continue; + } auto ov_param_index = std::distance(ov_parameters.begin(), it); - auto shape = ov_parameters[ov_param_index].get_partial_shape(); auto type = ov_parameters[ov_param_index].get_element_type(); + ParameterInfo info{onnx_name, ov_param_index, onnx_param_index, type, ParameterShape{shape}}; // Analyze shape dynamism and set flags @@ -112,7 +115,7 @@ struct OnnxToOvNetworkBindings { info.SetFullyDynamic(has_fully_dynamic); info.SetBoundedDynamic(has_bounded_dynamic); } - + input_output_map.push_back(std::move(info)); } }; diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index 6c8b60324bbbf..77b0821412e4c 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -184,7 +184,6 @@ common::Status OpenVINOExecutionProvider::Compile( for (const auto& fused_node_graph : fused_nodes) { const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph; - // Set include_embed_data to true only for the first backend manager backend_it->TryExportCompiledBlobAsEPCtxNode(graph_body_viewer, is_first); diff --git a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc index 54510ce333a49..c4daa2232eebf 100644 --- a/onnxruntime/core/providers/openvino/openvino_parser_utils.cc +++ b/onnxruntime/core/providers/openvino/openvino_parser_utils.cc @@ -327,8 +327,6 @@ affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_defini std::smatch device_match = *device_it; std::string device_name = device_match[1].str(); std::string nodes_list_str = device_match[2].str(); - //std::cout << "device_name " << device_name << "\n"; - //std::cout << "nodes_list_str " << nodes_list_str << "\n"; std::stringstream nodes_list(nodes_list_str); std::string item; @@ -337,10 +335,6 @@ affinity_t OpenVINOParserUtils::ParseAffinity(const std::string& affinity_defini } } - //for (auto item : result_map){ - // std::cout << "\n" << item.first << " on " << item.second << "\n"; - //} - return result_map; } diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 78ef874d45ae0..38d52151465cd 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -267,7 +267,6 @@ static void ParseProviderInfo(const ProviderOptions& provider_options, } if (provider_options.contains("affinity")) { - std::cout << "Provider options contain affinity\n"; pi.affinity = OpenVINOParserUtils::ParseAffinity(provider_options.at("affinity")); } diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 23be3447b8799..61477fe480566 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -84,7 +84,7 @@ std::shared_ptr OVCore::ReadModel(std::string&& model, const std::str ov::frontend::InputModel::Ptr inputModel; ov::AnyVector params{&modelStream, model_path}; - + FE = manager.load_by_model(params); if (FE) { inputModel = FE->load(params); diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index 6e4145c050ebb..51754046ae199 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -152,7 +152,7 @@ std::vector supported_op_mode = { {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}}, {"GridSample", V_2022_3, {"CPU"}}, {"GridSample", V_2023_0, {"GPU"}}, - //{"GroupQueryAttention", V_2025_0, {"CPU"}}, + {"GroupQueryAttention", V_2025_0, {"CPU"}}, {"GRU", V_2024_1, {"CPU", "GPU"}}, {"HardMax", V_2023_1, {"CPU", "GPU"}}, {"Identity", V_2020_4, {"CPU", "GPU"}},