From 4bb42a467d38d47b7b6166815869667abf912506 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Wed, 6 Aug 2025 17:52:04 +0800
Subject: [PATCH 01/14] add rtx recipe

---
 .aitk/configs/checks.json                     |  6 +--
 .aitk/configs/model_list.json                 |  4 ++
 .../aitk/_copy.json.config                    | 31 ++++++++++++++
 .../aitk/inference_sample.ipynb               | 23 ++++++++++
 Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml      |  3 ++
 .../aitk/model_project.config                 |  4 ++
 .../aitk/qwen2_5_trtrtx_config.json           | 38 +++++++++++++++++
 .../aitk/qwen2_5_trtrtx_config.json.config    | 42 +++++++++++++++++++
 .../aitk/requirements.txt                     |  2 +-
 Qwen-Qwen2.5-1.5B-Instruct/aitk/winml.py      | 21 ++++++++++
 .../aitk/_copy.json.config                    |  8 ++++
 .../aitk/deepseek_trtrtx_config.json          | 38 +++++++++++++++++
 .../aitk/deepseek_trtrtx_config.json.config   | 42 +++++++++++++++++++
 .../aitk/inference_sample.ipynb               | 23 ++++++++++
 .../aitk/info.yml                             |  3 ++
 .../aitk/model_project.config                 |  4 ++
 .../aitk/requirements.txt                     |  2 +-
 .../aitk/winml.py                             | 21 ++++++++++
 .../aitk/_copy.json.config                    |  4 ++
 .../aitk/inference_sample.ipynb               | 24 +++++++++++
 .../aitk/requirements.txt                     |  2 +-
 .../aitk/winml.py                             | 21 ++++++++++
 .../aitk/_copy.json.config                    |  4 ++
 .../aitk/inference_sample.ipynb               | 24 +++++++++++
 ...ase-patch16-224_dml_inference_sample.ipynb | 24 +++++++++++
 ...-patch16-224_trtrtx_inference_sample.ipynb | 24 +++++++++++
 google-vit-base-patch16-224/aitk/winml.py     | 21 ++++++++++
 .../aitk/inference_sample.ipynb               | 24 +++++++++++
 .../aitk/requirements.txt                     |  2 +-
 intel-bert-base-uncased-mrpc/aitk/winml.py    | 21 ++++++++++
 .../aitk/_copy.json.config                    |  4 ++
 .../laion_clip_dml_inference_sample.ipynb     | 25 +++++++++++
 .../aitk/laion_clip_ov_inference_sample.ipynb | 28 +++++++++++++
 .../laion_clip_qdq_amd_inference_sample.ipynb | 28 +++++++++++++
 ...laion_clip_text_qnn_inference_sample.ipynb | 24 +++++++++++
 .../laion_clip_trtrtx_inference_sample.ipynb  | 25 +++++++++++
 .../aitk/laion_clip_vision_qnn.json           |  1 -
 .../aitk/winml.py                             | 21 ++++++++++
 .../aitk/_copy.json.config                    | 31 ++++++++++++++
 .../aitk/inference_sample.ipynb               | 23 ++++++++++
 .../aitk/info.yml                             |  3 ++
 .../aitk/llama3_2_trtrtx_config.json          | 38 +++++++++++++++++
 .../aitk/llama3_2_trtrtx_config.json.config   | 42 +++++++++++++++++++
 .../aitk/model_project.config                 |  4 ++
 .../aitk/requirements.txt                     |  2 +-
 .../aitk/winml.py                             | 21 ++++++++++
 .../aitk/_copy.json.config                    | 31 ++++++++++++++
 .../aitk/inference_sample.ipynb               | 23 ++++++++++
 microsoft-Phi-3.5-mini-instruct/aitk/info.yml |  3 ++
 .../aitk/model_project.config                 |  4 ++
 .../aitk/phi3_5_trtrtx_config.json            | 38 +++++++++++++++++
 .../aitk/phi3_5_trtrtx_config.json.config     | 42 +++++++++++++++++++
 .../aitk/requirements.txt                     |  2 +-
 microsoft-Phi-3.5-mini-instruct/aitk/winml.py | 21 ++++++++++
 .../aitk/inference_sample.ipynb               | 23 ++++++++++
 .../aitk/inference_sample.ipynb               | 23 ++++++++++
 .../aitk/inference_sample.ipynb               | 23 ++++++++++
 .../aitk/inference_sample.ipynb               | 24 +++++++++++
 .../aitk/resnet_dml_inference_sample.ipynb    | 23 ++++++++++
 .../aitk/resnet_trtrtx_inference_sample.ipynb | 23 ++++++++++
 .../aitk/_copy.json.config                    |  4 ++
 .../openai_clip_dml_inference_sample.ipynb    | 25 +++++++++++
 .../openai_clip_ov_inference_sample.ipynb     | 28 +++++++++++++
 ...openai_clip_qdq_amd_inference_sample.ipynb | 28 +++++++++++++
 ...penai_clip_text_qnn_inference_sample.ipynb | 24 +++++++++++
 .../openai_clip_trtrtx_inference_sample.ipynb | 25 +++++++++++
 .../aitk/openai_clip_vision_qnn.json          |  1 -
 openai-clip-vit-base-patch16/aitk/winml.py    | 21 ++++++++++
 .../aitk/_copy.json.config                    |  4 ++
 .../openai_clip_dml_inference_sample.ipynb    | 25 +++++++++++
 .../openai_clip_ov_inference_sample.ipynb     | 28 +++++++++++++
 ...openai_clip_qdq_amd_inference_sample.ipynb | 28 +++++++++++++
 ...penai_clip_text_qnn_inference_sample.ipynb | 24 +++++++++++
 .../openai_clip_trtrtx_inference_sample.ipynb | 25 +++++++++++
 .../aitk/openai_clip_vision_qnn.json          |  1 -
 openai-clip-vit-base-patch32/aitk/winml.py    | 21 ++++++++++
 76 files changed, 1433 insertions(+), 12 deletions(-)
 create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json
 create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json.config
 create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/winml.py
 create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config
 create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json
 create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config
 create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/winml.py
 create mode 100644 google-bert-bert-base-multilingual-cased/aitk/winml.py
 create mode 100644 google-vit-base-patch16-224/aitk/winml.py
 create mode 100644 intel-bert-base-uncased-mrpc/aitk/winml.py
 create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/winml.py
 create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json
 create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json.config
 create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/winml.py
 create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json
 create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json.config
 create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/winml.py
 create mode 100644 openai-clip-vit-base-patch16/aitk/winml.py
 create mode 100644 openai-clip-vit-base-patch32/aitk/winml.py

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index ed09ed73..5f84ecc8 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,12 +1,12 @@
 {
-    "configCheck": 75,
+    "configCheck": 79,
     "extensionCheck": 1,
     "gitignoreCheck": 31,
     "inferenceModelCheck": 22,
     "ipynbCheck": 50,
     "modelProjectCheck": 32,
     "oliveCheck": 0,
-    "oliveJsonCheck": 75,
-    "pathCheck": 744,
+    "oliveJsonCheck": 79,
+    "pathCheck": 752,
     "requirementsCheck": 31
 }
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index db801dfc..0edc00bd 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -23,6 +23,7 @@
             "runtimes": [
                 "QNN",
                 "AMDNPU",
+                "NvidiaTRTRTX",
                 "DML",
                 "IntelCPU",
                 "IntelGPU",
@@ -147,6 +148,7 @@
             "runtimes": [
                 "QNN",
                 "AMDNPU",
+                "NvidiaTRTRTX",
                 "DML",
                 "IntelCPU",
                 "IntelGPU",
@@ -195,6 +197,7 @@
             "runtimes": [
                 "QNN",
                 "AMDNPU",
+                "NvidiaTRTRTX",
                 "DML",
                 "IntelCPU",
                 "IntelGPU",
@@ -358,6 +361,7 @@
             "runtimes": [
                 "QNN",
                 "AMDNPU",
+                "NvidiaTRTRTX",
                 "DML",
                 "IntelCPU",
                 "IntelGPU",
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
index a2d6f70d..999a03d6 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
@@ -16,6 +16,14 @@
                     "find": "deepseek_ov_config",
                     "replace": "qwen2_5_ov_config"
                 },
+                {
+                    "find": "deepseek_trtrtx_config",
+                    "replace": "qwen2_5_trtrtx_config"
+                },
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "Qwen/Qwen2.5-1.5B-Instruct"
+                },
                 {
                     "find": "deepseek_dml_config",
                     "replace": "qwen2_5_dml_config"
@@ -84,6 +92,20 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json",
+            "dst": "qwen2_5_trtrtx_config.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "Qwen/Qwen2.5-1.5B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/qwen2_5"
+                }
+            ]
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json",
             "dst": "qwen2_5_dml_config.json",
@@ -98,6 +120,11 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config",
+            "dst": "qwen2_5_trtrtx_config.json.config",
+            "replacements": []
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config",
             "dst": "qwen2_5_dml_config.json.config",
@@ -135,6 +162,10 @@
                     "replace": "<|im_start|>user\\\\n{input}<|im_end|>\\\\n<|im_start|>assistant\\\\n"
                 }
             ]
+        },
+        {
+            "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
+            "dst": "winml.py"
         }
     ]
 }
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb b/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb
index 7757249e..c61db2d9 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb
@@ -11,6 +11,29 @@
     "model_folder = \"./model\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime_genai as og\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        og.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
index 81cd2194..e7bacd4c 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
@@ -14,6 +14,9 @@ recipes:
         - cpu
         - gpu
       ep: OpenVINOExecutionProvider
+    - file: "qwen2_5_trtrtx_config.json"
+      device: gpu
+      ep: NvTensorRTRTXExecutionProvider
     - file: "qwen2_5_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
index 68672843..61cb1603 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
@@ -12,6 +12,10 @@
             "file": "qwen2_5_ov_config.json",
             "templateName": "qwen2_5_ov_config"
         },
+        {
+            "file": "qwen2_5_trtrtx_config.json",
+            "templateName": "qwen2_5_trtrtx_config"
+        },
         {
             "file": "qwen2_5_dml_config.json",
             "templateName": "qwen2_5_dml_config"
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json
new file mode 100644
index 00000000..331d96fb
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "Qwen/Qwen2.5-1.5B-Instruct"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "NvTensorRTRTXExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/qwen2_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
\ No newline at end of file
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json.config
new file mode 100644
index 00000000..cb987c65
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json.config
@@ -0,0 +1,42 @@
+{
+    "name": "Convert to NVIDIA TRT for RTX",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "NVIDIA TensorRT for RTX"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "NvTensorRTRTXExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt b/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt
index 7af84714..e3c23f80 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt
@@ -1,4 +1,4 @@
 # This file will be installed together with AITK runtime requirements
 # For the full requirements, see AITK
 datasets
-optimum
+optimum==1.26.0
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/winml.py b/Qwen-Qwen2.5-1.5B-Instruct/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config
new file mode 100644
index 00000000..6fd21fb1
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config
@@ -0,0 +1,8 @@
+{
+    "copies": [
+        {
+            "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
+            "dst": "winml.py"
+        }
+    ]
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json
new file mode 100644
index 00000000..cff36b19
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "NvTensorRTRTXExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/deepseek",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
\ No newline at end of file
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config
new file mode 100644
index 00000000..cb987c65
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config
@@ -0,0 +1,42 @@
+{
+    "name": "Convert to NVIDIA TRT for RTX",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "NVIDIA TensorRT for RTX"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "NvTensorRTRTXExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb
index 67a72436..10076709 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb
@@ -11,6 +11,29 @@
     "model_folder = \"./model\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime_genai as og\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        og.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
index bcf926f3..0a43310f 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
@@ -16,6 +16,9 @@ recipes:
       ep: OpenVINOExecutionProvider
       aitk:
           oliveFile: "deepseek/openvino/DeepSeek-R1-Distill-Qwen-1.5B_context_ov_dynamic_sym_gs128_bkp_int8_sym_r1.json"
+    - file: "deepseek_trtrtx_config.json"
+      device: gpu
+      ep: NvTensorRTRTXExecutionProvider
     - file: "deepseek_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
index dab152a5..d78581fe 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
@@ -12,6 +12,10 @@
             "file": "deepseek_ov_config.json",
             "templateName": "deepseek_ov_config"
         },
+        {
+            "file": "deepseek_trtrtx_config.json",
+            "templateName": "deepseek_trtrtx_config"
+        },
         {
             "file": "deepseek_dml_config.json",
             "templateName": "deepseek_dml_config"
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt
index 7af84714..e3c23f80 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt
@@ -1,4 +1,4 @@
 # This file will be installed together with AITK runtime requirements
 # For the full requirements, see AITK
 datasets
-optimum
+optimum==1.26.0
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/winml.py b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))
diff --git a/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config b/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config
index ff27826d..acda97ab 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config
+++ b/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config
@@ -13,6 +13,10 @@
                     "replace": "Convert to Qualcomm NPU"
                 }
             ]
+        },
+        {
+            "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
+            "dst": "winml.py"
         }
     ]
 }
diff --git a/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb b/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb
index b86fca63..7f92e5e8 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb
+++ b/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb
@@ -13,6 +13,29 @@
     "    onnx_model_path = \"./model/openvino_model_st_quant.onnx\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -76,6 +99,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/google-bert-bert-base-multilingual-cased/aitk/requirements.txt b/google-bert-bert-base-multilingual-cased/aitk/requirements.txt
index b02be515..3fac61d6 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/requirements.txt
+++ b/google-bert-bert-base-multilingual-cased/aitk/requirements.txt
@@ -2,4 +2,4 @@
 # For the full requirements, see AITK
 olive-ai
 datasets
-optimum
+optimum==1.26.0
diff --git a/google-bert-bert-base-multilingual-cased/aitk/winml.py b/google-bert-bert-base-multilingual-cased/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/google-bert-bert-base-multilingual-cased/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))
diff --git a/google-vit-base-patch16-224/aitk/_copy.json.config b/google-vit-base-patch16-224/aitk/_copy.json.config
index 6a948f91..f924a60d 100644
--- a/google-vit-base-patch16-224/aitk/_copy.json.config
+++ b/google-vit-base-patch16-224/aitk/_copy.json.config
@@ -37,6 +37,10 @@
                     "replace": "NvTensorRTRTXExecutionProvider"
                 }
             ]
+        },
+        {
+            "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
+            "dst": "winml.py"
         }
     ]
 }
diff --git a/google-vit-base-patch16-224/aitk/inference_sample.ipynb b/google-vit-base-patch16-224/aitk/inference_sample.ipynb
index 345bbece..ddcf70b1 100644
--- a/google-vit-base-patch16-224/aitk/inference_sample.ipynb
+++ b/google-vit-base-patch16-224/aitk/inference_sample.ipynb
@@ -13,6 +13,29 @@
     "    onnx_model_path = \"./model/ov_model_st_quant.onnx\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -167,6 +190,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb
index d53b5df6..e7c6d91e 100644
--- a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb
@@ -13,6 +13,29 @@
     "    onnx_model_path = \"./model/ov_model_st_quant.onnx\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -167,6 +190,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb
index 98fd8b81..c7bb23c5 100644
--- a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb
@@ -13,6 +13,29 @@
     "    onnx_model_path = \"./model/ov_model_st_quant.onnx\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -167,6 +190,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/google-vit-base-patch16-224/aitk/winml.py b/google-vit-base-patch16-224/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))
diff --git a/intel-bert-base-uncased-mrpc/aitk/inference_sample.ipynb b/intel-bert-base-uncased-mrpc/aitk/inference_sample.ipynb
index 53782e1f..689134e2 100644
--- a/intel-bert-base-uncased-mrpc/aitk/inference_sample.ipynb
+++ b/intel-bert-base-uncased-mrpc/aitk/inference_sample.ipynb
@@ -13,6 +13,29 @@
     "    onnx_model_path = \"./model/ov_model_st_quant.onnx\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -33,6 +56,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/intel-bert-base-uncased-mrpc/aitk/requirements.txt b/intel-bert-base-uncased-mrpc/aitk/requirements.txt
index bad441ca..69515098 100644
--- a/intel-bert-base-uncased-mrpc/aitk/requirements.txt
+++ b/intel-bert-base-uncased-mrpc/aitk/requirements.txt
@@ -1,4 +1,4 @@
 # This file will be installed together with AITK runtime requirements
 # For the full requirements, see AITK
 olive-ai
-optimum
+optimum==1.26.0
diff --git a/intel-bert-base-uncased-mrpc/aitk/winml.py b/intel-bert-base-uncased-mrpc/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/intel-bert-base-uncased-mrpc/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
index eb77a259..f99b7656 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
@@ -218,6 +218,10 @@
         {
             "src": "../../openai-clip-vit-base-patch16/aitk/requirements.txt",
             "dst": "requirements.txt"
+        },
+        {
+            "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
+            "dst": "winml.py"
         }
     ]
 }
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb
index c33db85d..e174c596 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb
@@ -11,6 +11,30 @@
     "ExecutionProvider=\"DmlExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -42,6 +66,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb
index df300a10..91277a22 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb
@@ -15,6 +15,34 @@
     "ExecutionProvider=\"OpenVINOExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb
index b5dd1398..e3267d68 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb
@@ -15,6 +15,34 @@
     "ExecutionProvider=\"VitisAIExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb
index 293b9b1f..f78cd53e 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb
@@ -11,6 +11,30 @@
     "ExecutionProvider=\"QNNExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0ea54b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb
index c4c32324..fc5e4a5f 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb
@@ -11,6 +11,30 @@
     "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -42,6 +66,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json
index 20f32514..2f7d232f 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json
@@ -2,7 +2,6 @@
     "input_model": {
         "type": "PytorchModel",
         "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
-        "generative": false,
         "io_config": {
             "input_names": [
                 "pixel_values"
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/winml.py b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
index 338ad7e5..87aead8c 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
@@ -16,6 +16,14 @@
                     "find": "deepseek_ov_config",
                     "replace": "llama3_2_ov_config"
                 },
+                {
+                    "find": "deepseek_trtrtx_config",
+                    "replace": "llama3_2_trtrtx_config"
+                },
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "meta-llama/Llama-3.2-1B-Instruct"
+                },
                 {
                     "find": "deepseek_dml_config",
                     "replace": "llama3_2_dml_config"
@@ -100,6 +108,20 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json",
+            "dst": "llama3_2_trtrtx_config.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "meta-llama/Llama-3.2-1B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/llama3_2"
+                }
+            ]
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json",
             "dst": "llama3_2_dml_config.json",
@@ -114,6 +136,11 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config",
+            "dst": "llama3_2_trtrtx_config.json.config",
+            "replacements": []
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config",
             "dst": "llama3_2_dml_config.json.config",
@@ -151,6 +178,10 @@
                     "replace": "<|start_header_id|>user<|end_header_id|>\\\\n{input}<|start_header_id|>assistant<|end_header_id|>\\\\n"
                 }
             ]
+        },
+        {
+            "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
+            "dst": "winml.py"
         }
     ]
 }
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb b/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb
index 77a3070b..65766934 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb
@@ -11,6 +11,29 @@
     "model_folder = \"./model\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime_genai as og\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        og.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
index 16ebc30d..097226e1 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
@@ -14,6 +14,9 @@ recipes:
         - cpu
         - gpu
       ep: OpenVINOExecutionProvider
+    - file: "llama3_2_trtrtx_config.json"
+      device: gpu
+      ep: NvTensorRTRTXExecutionProvider
     - file: "llama3_2_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json
new file mode 100644
index 00000000..a135798d
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "meta-llama/Llama-3.2-1B-Instruct"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "NvTensorRTRTXExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/llama3_2",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
\ No newline at end of file
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json.config
new file mode 100644
index 00000000..cb987c65
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json.config
@@ -0,0 +1,42 @@
+{
+    "name": "Convert to NVIDIA TRT for RTX",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "NVIDIA TensorRT for RTX"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "NvTensorRTRTXExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
index f5a73299..e800ea2d 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
@@ -12,6 +12,10 @@
             "file": "llama3_2_ov_config.json",
             "templateName": "llama3_2_ov_config"
         },
+        {
+            "file": "llama3_2_trtrtx_config.json",
+            "templateName": "llama3_2_trtrtx_config"
+        },
         {
             "file": "llama3_2_dml_config.json",
             "templateName": "llama3_2_dml_config"
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt b/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt
index 7af84714..e3c23f80 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt
@@ -1,4 +1,4 @@
 # This file will be installed together with AITK runtime requirements
 # For the full requirements, see AITK
 datasets
-optimum
+optimum==1.26.0
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/winml.py b/meta-llama-Llama-3.2-1B-Instruct/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
index b4f7cbdb..e78f3b56 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
@@ -16,6 +16,14 @@
                     "find": "deepseek_ov_config",
                     "replace": "phi3_5_ov_config"
                 },
+                {
+                    "find": "deepseek_trtrtx_config",
+                    "replace": "phi3_5_trtrtx_config"
+                },
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "microsoft/Phi-3.5-mini-instruct"
+                },
                 {
                     "find": "deepseek_dml_config",
                     "replace": "phi3_5_dml_config"
@@ -84,6 +92,20 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json",
+            "dst": "phi3_5_trtrtx_config.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "microsoft/Phi-3.5-mini-instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/phi3_5"
+                }
+            ]
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json",
             "dst": "phi3_5_dml_config.json",
@@ -98,6 +120,11 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config",
+            "dst": "phi3_5_trtrtx_config.json.config",
+            "replacements": []
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config",
             "dst": "phi3_5_dml_config.json.config",
@@ -131,6 +158,10 @@
                     "replace": "<|user|>\\\\n{input} <|end|>\\\\n<|assistant|>"
                 }
             ]
+        },
+        {
+            "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
+            "dst": "winml.py"
         }
     ]
 }
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb b/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb
index a47cdc58..71492571 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb
@@ -11,6 +11,29 @@
     "model_folder = \"./model\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime_genai as og\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        og.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
index 2801977a..d1e2d6ea 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
@@ -14,6 +14,9 @@ recipes:
         - cpu
         - gpu
       ep: OpenVINOExecutionProvider
+    - file: "phi3_5_trtrtx_config.json"
+      device: gpu
+      ep: NvTensorRTRTXExecutionProvider
     - file: "phi3_5_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
index a5f764fe..358e2c6c 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
@@ -12,6 +12,10 @@
             "file": "phi3_5_ov_config.json",
             "templateName": "phi3_5_ov_config"
         },
+        {
+            "file": "phi3_5_trtrtx_config.json",
+            "templateName": "phi3_5_trtrtx_config"
+        },
         {
             "file": "phi3_5_dml_config.json",
             "templateName": "phi3_5_dml_config"
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json
new file mode 100644
index 00000000..aa9eaf9c
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/Phi-3.5-mini-instruct"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "NvTensorRTRTXExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/phi3_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
\ No newline at end of file
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json.config
new file mode 100644
index 00000000..cb987c65
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json.config
@@ -0,0 +1,42 @@
+{
+    "name": "Convert to NVIDIA TRT for RTX",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "NVIDIA TensorRT for RTX"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "NvTensorRTRTXExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt b/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt
index 7af84714..e3c23f80 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt
@@ -1,4 +1,4 @@
 # This file will be installed together with AITK runtime requirements
 # For the full requirements, see AITK
 datasets
-optimum
+optimum==1.26.0
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/winml.py b/microsoft-Phi-3.5-mini-instruct/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))
diff --git a/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb b/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb
index 70e1b959..d62d3d12 100644
--- a/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb
+++ b/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb
@@ -11,6 +11,29 @@
     "model_folder = \"./model\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime_genai as og\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        og.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb b/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb
index 70e1b959..d62d3d12 100644
--- a/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb
+++ b/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb
@@ -11,6 +11,29 @@
     "model_folder = \"./model\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime_genai as og\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        og.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb b/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb
index 70e1b959..d62d3d12 100644
--- a/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb
+++ b/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb
@@ -11,6 +11,29 @@
     "model_folder = \"./model\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime_genai as og\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        og.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/microsoft-resnet-50/aitk/inference_sample.ipynb b/microsoft-resnet-50/aitk/inference_sample.ipynb
index c167ae59..e5e3c336 100644
--- a/microsoft-resnet-50/aitk/inference_sample.ipynb
+++ b/microsoft-resnet-50/aitk/inference_sample.ipynb
@@ -16,6 +16,29 @@
     "    transpose = True"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -54,6 +77,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb
index 489618e6..5e70d5d2 100644
--- a/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb
+++ b/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb
@@ -10,6 +10,29 @@
     "ExecutionProvider=\"DmlExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb
index 25eebee1..2ec7d55a 100644
--- a/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb
+++ b/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb
@@ -10,6 +10,29 @@
     "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/openai-clip-vit-base-patch16/aitk/_copy.json.config b/openai-clip-vit-base-patch16/aitk/_copy.json.config
index abd20714..2f6d2216 100644
--- a/openai-clip-vit-base-patch16/aitk/_copy.json.config
+++ b/openai-clip-vit-base-patch16/aitk/_copy.json.config
@@ -23,6 +23,10 @@
                     "replace": "DmlExecutionProvider"
                 }
             ]
+        },
+        {
+            "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
+            "dst": "winml.py"
         }
     ]
 }
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb
index 19f4bc70..83c5e565 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb
@@ -11,6 +11,30 @@
     "ExecutionProvider=\"DmlExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -42,6 +66,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb
index 18a7aa58..b133b368 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb
@@ -15,6 +15,34 @@
     "ExecutionProvider=\"OpenVINOExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb
index a4cb3eb3..86363274 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb
@@ -15,6 +15,34 @@
     "ExecutionProvider=\"VitisAIExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb
index 9f0a36b2..f00e5fef 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb
@@ -11,6 +11,30 @@
     "ExecutionProvider=\"QNNExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0ea54b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb
index a3c6f084..2343edf0 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb
@@ -11,6 +11,30 @@
     "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -42,6 +66,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json
index b58a975f..cf98fb1b 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json
@@ -2,7 +2,6 @@
     "input_model": {
         "type": "PytorchModel",
         "model_path": "openai/clip-vit-base-patch16",
-        "generative": false,
         "io_config": {
             "input_names": [
                 "pixel_values"
diff --git a/openai-clip-vit-base-patch16/aitk/winml.py b/openai-clip-vit-base-patch16/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))
diff --git a/openai-clip-vit-base-patch32/aitk/_copy.json.config b/openai-clip-vit-base-patch32/aitk/_copy.json.config
index 0a9e193a..a771d852 100644
--- a/openai-clip-vit-base-patch32/aitk/_copy.json.config
+++ b/openai-clip-vit-base-patch32/aitk/_copy.json.config
@@ -199,6 +199,10 @@
         {
             "src": "../../openai-clip-vit-base-patch16/aitk/requirements.txt",
             "dst": "requirements.txt"
+        },
+        {
+            "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
+            "dst": "winml.py"
         }
     ]
 }
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb
index db21746c..7e2f5cc6 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb
@@ -11,6 +11,30 @@
     "ExecutionProvider=\"DmlExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -42,6 +66,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb
index ef626f4c..0312f8d6 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb
@@ -15,6 +15,34 @@
     "ExecutionProvider=\"OpenVINOExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb
index 95bfb0a4..fa35e6a2 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb
@@ -15,6 +15,34 @@
     "ExecutionProvider=\"VitisAIExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf6bb9b8",
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb
index 0a120030..3efc378d 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb
@@ -11,6 +11,30 @@
     "ExecutionProvider=\"QNNExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f0ea54b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb
index ee2b42fd..4c1986a4 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb
@@ -11,6 +11,30 @@
     "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -42,6 +66,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json
index a12522a0..7c7ed386 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json
@@ -2,7 +2,6 @@
     "input_model": {
         "type": "PytorchModel",
         "model_path": "openai/clip-vit-base-patch32",
-        "generative": false,
         "io_config": {
             "input_names": [
                 "pixel_values"
diff --git a/openai-clip-vit-base-patch32/aitk/winml.py b/openai-clip-vit-base-patch32/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))

From a5cfa124a8355b43c17b3a535e50b6f640ba9ac5 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Wed, 6 Aug 2025 17:58:50 +0800
Subject: [PATCH 02/14] add break

---
 .../aitk/laion_clip_ov_inference_sample.ipynb |  1 +
 .../laion_clip_qdq_amd_inference_sample.ipynb |  1 +
 ...laion_clip_text_qnn_inference_sample.ipynb |  1 +
 ...ion_clip_vision_qnn_inference_sample.ipynb | 25 +++++++++++++++++++
 .../openai_clip_ov_inference_sample.ipynb     |  1 +
 ...openai_clip_qdq_amd_inference_sample.ipynb |  1 +
 ...penai_clip_text_qnn_inference_sample.ipynb |  1 +
 ...nai_clip_vision_qnn_inference_sample.ipynb | 25 +++++++++++++++++++
 .../openai_clip_ov_inference_sample.ipynb     |  1 +
 ...openai_clip_qdq_amd_inference_sample.ipynb |  1 +
 ...penai_clip_text_qnn_inference_sample.ipynb |  1 +
 ...nai_clip_vision_qnn_inference_sample.ipynb | 25 +++++++++++++++++++
 12 files changed, 84 insertions(+)

diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb
index 91277a22..e09f24a8 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb
@@ -78,6 +78,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb
index e3267d68..36eaada7 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb
@@ -78,6 +78,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb
index f78cd53e..0884b6ac 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb
@@ -80,6 +80,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb
index 02cfa10a..aa8a8757 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb
@@ -12,6 +12,30 @@
     "ExecutionProvider=\"QNNExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f82e3bca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
@@ -57,6 +81,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb
index b133b368..9ba20f48 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb
@@ -78,6 +78,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb
index 86363274..4c288088 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb
@@ -78,6 +78,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb
index f00e5fef..46a0e8d6 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb
@@ -80,6 +80,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb
index f3609ed0..f90ea43a 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb
@@ -12,6 +12,30 @@
     "ExecutionProvider=\"QNNExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f82e3bca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
@@ -57,6 +81,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb
index 0312f8d6..03e0fc89 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb
@@ -78,6 +78,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb
index fa35e6a2..658c7098 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb
@@ -78,6 +78,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     " \n",
     "opts = ort.SessionOptions()\n",
     " \n",
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb
index 3efc378d..347c9d15 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb
@@ -80,6 +80,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb
index 518a97c7..0863f581 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb
@@ -12,6 +12,30 @@
     "ExecutionProvider=\"QNNExecutionProvider\""
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f82e3bca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "897ffb42-3569-4d78-b99d-355a38fdce35",
@@ -57,6 +81,7 @@
     "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
     "            print(f\"Adding {ep_name} for {device_type}\")\n",
     "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
     "\n",
     "\n",
     "session_options = ort.SessionOptions()\n",

From 193561525bbe889b2e752ebe5e91b379c284c213 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Fri, 8 Aug 2025 11:55:17 +0800
Subject: [PATCH 03/14] change req

---
 .aitk/requirements/requirements-WCR.txt      | 24 ++++++++++++++++----
 .aitk/requirements/requirements-WCR_CUDA.txt | 23 +++++++++++++++----
 .aitk/scripts/install_freeze.py              | 19 ++++++++++++----
 3 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/.aitk/requirements/requirements-WCR.txt b/.aitk/requirements/requirements-WCR.txt
index 8220bd4e..b112f003 100644
--- a/.aitk/requirements/requirements-WCR.txt
+++ b/.aitk/requirements/requirements-WCR.txt
@@ -2,9 +2,10 @@
 onnx==1.17.0
 numpy==2.2.4
 protobuf==6.30.2
-# olive-ai==0.9.1
-olive-ai==0.9.1
-onnxscript==0.2.5
+# olive-ai@git+https://github.com/microsoft/Olive.git#egg=olive-ai
+olive-ai@git+https://github.com/microsoft/Olive.git#egg=olive-ai
+onnx-ir==0.1.4
+onnxscript==0.3.2
 optuna==4.2.1
 pandas==2.2.3
 pydantic==2.11.1
@@ -47,11 +48,11 @@ widgetsnbextension==4.0.13
 # torchvision==0.22.0
 torchvision==0.22.0
 pillow==11.2.1
-# uvpip:install onnxruntime-winml==1.22.0.post1 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
+# uvpip:install onnxruntime-winml==1.22.0.post2 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
 coloredlogs==15.0.1
 flatbuffers==25.2.10
 sympy==1.14.0
-# uvpip:install onnxruntime-genai-winml==0.8.3 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
+# uvpip:install onnxruntime-genai-winml==0.9.0.dev0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
 # evaluate==0.4.3
 evaluate==0.4.3
 # scikit-learn==1.6.1
@@ -59,3 +60,16 @@ scikit-learn==1.6.1
 joblib==1.5.0
 scipy==1.15.3
 threadpoolctl==3.6.0
+--index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple
+--extra-index-url https://pypi.org/simple
+# winrt-runtime==3.2.1
+winrt-runtime==3.2.1
+typing-extensions==4.14.1
+# winrt-Windows.Foundation==3.2.1
+winrt-Windows.Foundation==3.2.1
+# winrt-Windows.Foundation.Collections==3.2.1
+winrt-Windows.Foundation.Collections==3.2.1
+# winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4
+winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4
+# winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4
+winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4
diff --git a/.aitk/requirements/requirements-WCR_CUDA.txt b/.aitk/requirements/requirements-WCR_CUDA.txt
index 1713433c..4b50fc63 100644
--- a/.aitk/requirements/requirements-WCR_CUDA.txt
+++ b/.aitk/requirements/requirements-WCR_CUDA.txt
@@ -12,9 +12,10 @@ typing-extensions==4.14.0
 onnx==1.17.0
 numpy==2.2.4
 protobuf==6.30.2
-# olive-ai==0.9.1
-olive-ai==0.9.1
-onnxscript==0.2.5
+# olive-ai@git+https://github.com/microsoft/Olive.git#egg=olive-ai
+olive-ai@git+https://github.com/microsoft/Olive.git#egg=olive-ai
+onnx-ir==0.1.4
+onnxscript==0.3.2
 optuna==4.2.1
 pandas==2.2.3
 pydantic==2.11.1
@@ -54,10 +55,10 @@ widgetsnbextension==4.0.13
 # torchvision==0.22.0+cu128
 torchvision==0.22.0+cu128
 pillow==11.2.1
-# uvpip:install onnxruntime-winml==1.22.0.post1 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
+# uvpip:install onnxruntime-winml==1.22.0.post2 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
 coloredlogs==15.0.1
 flatbuffers==25.2.10
-# uvpip:install onnxruntime-genai-winml==0.8.3 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
+# uvpip:install onnxruntime-genai-winml==0.9.0.dev0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
 # evaluate==0.4.3
 evaluate==0.4.3
 # scikit-learn==1.6.1
@@ -65,3 +66,15 @@ scikit-learn==1.6.1
 joblib==1.5.0
 scipy==1.15.3
 threadpoolctl==3.6.0
+--index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple
+--extra-index-url https://pypi.org/simple
+# winrt-runtime==3.2.1
+winrt-runtime==3.2.1
+# winrt-Windows.Foundation==3.2.1
+winrt-Windows.Foundation==3.2.1
+# winrt-Windows.Foundation.Collections==3.2.1
+winrt-Windows.Foundation.Collections==3.2.1
+# winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4
+winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4
+# winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4
+winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4
diff --git a/.aitk/scripts/install_freeze.py b/.aitk/scripts/install_freeze.py
index 02c2eecd..e4e5e5ad 100644
--- a/.aitk/scripts/install_freeze.py
+++ b/.aitk/scripts/install_freeze.py
@@ -15,10 +15,19 @@
 depsPrefix = "# deps:"
 cudaExtraUrl = "--extra-index-url https://download.pytorch.org/whl/cu128"
 torchCudaVersion = "torch==2.7.0+cu128"
-onnxruntimeWinmlVersion = f"{uvpipInstallPrefix} onnxruntime-winml==1.22.0.post1 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post"
-onnxruntimeGenaiWinmlVersion = f"{uvpipInstallPrefix} onnxruntime-genai-winml==0.8.3 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post"
+onnxruntimeWinmlVersion = f"{uvpipInstallPrefix} onnxruntime-winml==1.22.0.post2 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post"
+onnxruntimeGenaiWinmlVersion = f"{uvpipInstallPrefix} onnxruntime-genai-winml==0.9.0.dev0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post"
 evaluateVersion = "evaluate==0.4.3"
 scikitLearnVersion = "scikit-learn==1.6.1"
+winrtPackage = [
+    "--index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple",
+    "--extra-index-url https://pypi.org/simple",
+    "winrt-runtime==3.2.1",
+    "winrt-Windows.Foundation==3.2.1",
+    "winrt-Windows.Foundation.Collections==3.2.1",
+    "winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4",
+    "winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4",
+]
 
 
 def get_requires(name: str, args):
@@ -50,8 +59,8 @@ def get_requires(name: str, args):
 
 def main():
     # Constants
-    # if from git: "git+https://github.com/microsoft/Olive.git@COMMIT_ID#egg=olive_ai
-    oliveAi = "olive-ai==0.9.1"
+    # if from git: "git+https://github.com/microsoft/Olive.git@COMMIT_ID#egg=olive-ai
+    oliveAi = "olive-ai@git+https://github.com/microsoft/Olive.git#egg=olive-ai"
     torchVision = "torchvision==0.22.0"
     pre = {
         RuntimeEnum.NvidiaGPU: [
@@ -107,6 +116,7 @@ def main():
             onnxruntimeGenaiWinmlVersion,
             evaluateVersion,
             scikitLearnVersion,
+            *winrtPackage,
         ],
         RuntimeEnum.WCR_CUDA: [
             "torchvision==0.22.0+cu128",
@@ -114,6 +124,7 @@ def main():
             onnxruntimeGenaiWinmlVersion,
             evaluateVersion,
             scikitLearnVersion,
+            *winrtPackage,
         ],
         RuntimeEnum.QNN_LLLM: [
             "ipykernel==6.29.5",

From 72a4bf40532c7c2de243cae3a266571f7fd1c161 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Tue, 19 Aug 2025 15:28:52 +0800
Subject: [PATCH 04/14] add webgpu recipe

---
 .aitk/configs/checks.json                     |   6 +-
 .aitk/configs/model_list.json                 |  60 ++++----
 .aitk/requirements/requirements-WCR.txt       |  14 +-
 .aitk/requirements/requirements-WCR_CUDA.txt  |  14 +-
 .aitk/scripts/model_lab/__init__.py           |   1 +
 .aitk/scripts/sanitize/constants.py           |   1 +
 .aitk/scripts/sanitize/main.py                |   3 +-
 .aitk/scripts/sanitize/utils.py               |   3 +
 .../aitk/_copy.json.config                    |  31 ++++-
 .../aitk/model_project.config                 |   4 +
 .../aitk/qwen2_5_webgpu_config.json           |  38 +++++
 .../aitk/qwen2_5_webgpu_config.json.config    |  42 ++++++
 .../aitk/deepseek_webgpu_config.json          |  38 +++++
 .../aitk/deepseek_webgpu_config.json.config   |  42 ++++++
 .../aitk/info.yml                             |   3 +
 .../aitk/model_project.config                 |   4 +
 .../aitk/bert_webgpu.json                     | 130 ++++++++++++++++++
 .../aitk/bert_webgpu.json.config              | 102 ++++++++++++++
 intel-bert-base-uncased-mrpc/aitk/info.yml    |   4 +
 .../aitk/model_project.config                 |   4 +
 .../aitk/_copy.json.config                    |  31 ++++-
 .../aitk/llama3_2_webgpu_config.json          |  38 +++++
 .../aitk/llama3_2_webgpu_config.json.config   |  42 ++++++
 .../aitk/model_project.config                 |   4 +
 .../aitk/_copy.json.config                    |  31 ++++-
 .../aitk/model_project.config                 |   4 +
 .../aitk/phi3_5_webgpu_config.json            |  38 +++++
 .../aitk/phi3_5_webgpu_config.json.config     |  42 ++++++
 28 files changed, 716 insertions(+), 58 deletions(-)
 create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
 create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
 create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
 create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
 create mode 100644 intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
 create mode 100644 intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
 create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
 create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
 create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
 create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index 5904d6a2..532ef433 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,13 +1,13 @@
 {
-    "configCheck": 80,
+    "configCheck": 85,
     "extensionCheck": 1,
     "gitignoreCheck": 32,
     "inferenceModelCheck": 22,
     "ipynbCheck": 51,
     "modelProjectCheck": 33,
     "oliveCheck": 0,
-    "oliveJsonCheck": 80,
-    "pathCheck": 756,
+    "oliveJsonCheck": 85,
+    "pathCheck": 772,
     "requirementsCheck": 32,
     "venvRequirementsCheck": 12
 }
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index 82540e55..2acec4fe 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -24,10 +24,11 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML",
+                "WebGpu"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -73,10 +74,10 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -92,10 +93,10 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -111,10 +112,11 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML",
+                "WebGpu"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -130,10 +132,10 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -149,10 +151,10 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -198,10 +200,10 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -242,9 +244,7 @@
             "modelLink": "https://huggingface.co/microsoft/Phi-4-reasoning",
             "id": "huggingface/microsoft/Phi-4-reasoning",
             "runtimes": [
-                "IntelCPU",
-                "IntelGPU",
-                "IntelNPU"
+                "IntelGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -257,9 +257,7 @@
             "modelLink": "https://huggingface.co/microsoft/Phi-4-reasoning-plus",
             "id": "huggingface/microsoft/Phi-4-reasoning-plus",
             "runtimes": [
-                "IntelCPU",
-                "IntelGPU",
-                "IntelNPU"
+                "IntelGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -275,10 +273,10 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML"
             ],
             "architecture": "CNN",
             "status": "Ready",
@@ -307,10 +305,10 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -326,10 +324,10 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -375,10 +373,10 @@
                 "QNN",
                 "AMDNPU",
                 "NvidiaTRTRTX",
-                "DML",
                 "IntelCPU",
                 "IntelGPU",
-                "IntelNPU"
+                "IntelNPU",
+                "DML"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -584,7 +582,8 @@
         "AMD NPU": "AMDNPU",
         "NVIDIA GPU": "NvidiaGPU",
         "NVIDIA TensorRT for RTX": "NvidiaTRTRTX",
-        "DirectML": "DML"
+        "DirectML": "DML",
+        "WebGpu": "WebGpu"
     },
     "RuntimeToDisplayName": {
         "CPU": "CPU",
@@ -596,6 +595,7 @@
         "AMDNPU": "AMD NPU",
         "NvidiaGPU": "NVIDIA GPU",
         "NvidiaTRTRTX": "NVIDIA TensorRT for RTX",
-        "DML": "DirectML"
+        "DML": "DirectML",
+        "WebGpu": "WebGpu"
     }
 }
diff --git a/.aitk/requirements/requirements-WCR.txt b/.aitk/requirements/requirements-WCR.txt
index 0fcd89e0..7e3a9ce9 100644
--- a/.aitk/requirements/requirements-WCR.txt
+++ b/.aitk/requirements/requirements-WCR.txt
@@ -48,11 +48,13 @@ widgetsnbextension==4.0.13
 # torchvision==0.22.0
 torchvision==0.22.0
 pillow==11.2.1
-# uvpip:install onnxruntime-winml==1.22.0.post2 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
+./onnxruntime_winml-1.23.0.dev20250811-cp312-cp312-win_amd64.whl
+# download:onnxruntime_winml-1.23.0.dev20250811-cp312-cp312-win_amd64.whl
 coloredlogs==15.0.1
 flatbuffers==25.2.10
 sympy==1.14.0
-# uvpip:install onnxruntime-genai-winml==0.9.0.dev0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
+./onnxruntime_genai_winml-0.10.0.dev20250814-cp312-cp312-win_amd64.whl
+# download:onnxruntime_genai_winml-0.10.0.dev20250814-cp312-cp312-win_amd64.whl
 # evaluate==0.4.3
 evaluate==0.4.3
 # scikit-learn==1.6.1
@@ -71,7 +73,7 @@ typing-extensions==4.14.1
 winrt-Windows.Foundation==3.2.1
 # winrt-Windows.Foundation.Collections==3.2.1
 winrt-Windows.Foundation.Collections==3.2.1
-# winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4
-winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4
-# winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4
-winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4
+./wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
+# download:wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
+./wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
+# download:wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
diff --git a/.aitk/requirements/requirements-WCR_CUDA.txt b/.aitk/requirements/requirements-WCR_CUDA.txt
index 048a8121..ceb0a03d 100644
--- a/.aitk/requirements/requirements-WCR_CUDA.txt
+++ b/.aitk/requirements/requirements-WCR_CUDA.txt
@@ -55,10 +55,12 @@ widgetsnbextension==4.0.13
 # torchvision==0.22.0+cu128
 torchvision==0.22.0+cu128
 pillow==11.2.1
-# uvpip:install onnxruntime-winml==1.22.0.post2 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
+./onnxruntime_winml-1.23.0.dev20250811-cp312-cp312-win_amd64.whl
+# download:onnxruntime_winml-1.23.0.dev20250811-cp312-cp312-win_amd64.whl
 coloredlogs==15.0.1
 flatbuffers==25.2.10
-# uvpip:install onnxruntime-genai-winml==0.9.0.dev0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post
+./onnxruntime_genai_winml-0.10.0.dev20250814-cp312-cp312-win_amd64.whl
+# download:onnxruntime_genai_winml-0.10.0.dev20250814-cp312-cp312-win_amd64.whl
 # evaluate==0.4.3
 evaluate==0.4.3
 # scikit-learn==1.6.1
@@ -76,7 +78,7 @@ winrt-runtime==3.2.1
 winrt-Windows.Foundation==3.2.1
 # winrt-Windows.Foundation.Collections==3.2.1
 winrt-Windows.Foundation.Collections==3.2.1
-# winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4
-winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4
-# winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4
-winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4
+./wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
+# download:wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
+./wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
+# download:wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
diff --git a/.aitk/scripts/model_lab/__init__.py b/.aitk/scripts/model_lab/__init__.py
index acf74087..7d9c062d 100644
--- a/.aitk/scripts/model_lab/__init__.py
+++ b/.aitk/scripts/model_lab/__init__.py
@@ -11,6 +11,7 @@ class RuntimeEnum(Enum):
     IntelGPU = "IntelGPU"
     IntelNPU = "IntelNPU"
     DML = "DML"
+    WebGpu = "WebGpu"
     NvidiaGPU = "NvidiaGPU"
     WCR = "WCR"
     WCR_CUDA = "WCR_CUDA"
diff --git a/.aitk/scripts/sanitize/constants.py b/.aitk/scripts/sanitize/constants.py
index 8c46194e..6f5a49ab 100644
--- a/.aitk/scripts/sanitize/constants.py
+++ b/.aitk/scripts/sanitize/constants.py
@@ -92,6 +92,7 @@ class EPNames(Enum):
     VitisAIExecutionProvider = "VitisAIExecutionProvider"
     NvTensorRTRTXExecutionProvider = "NvTensorRTRTXExecutionProvider"
     DmlExecutionProvider = "DmlExecutionProvider"
+    WebGpuExecutionProvider = "WebGpuExecutionProvider"
 
 
 class OliveDeviceTypes(Enum):
diff --git a/.aitk/scripts/sanitize/main.py b/.aitk/scripts/sanitize/main.py
index 90331ac4..da1f7b69 100644
--- a/.aitk/scripts/sanitize/main.py
+++ b/.aitk/scripts/sanitize/main.py
@@ -53,6 +53,8 @@ def main():
     # check parameter template
     parameterTemplate = readCheckParameterTemplate(os.path.join(configDir, "parameter_template.json"))
 
+    modelList.Check()
+    
     # check each model
     for model in modelList.allModels():
         modelDir = shouldCheckModel(str(rootDir), configDir, model)
@@ -171,7 +173,6 @@ def main():
                         # Write back to file
                         newContent = json.dumps(inferenceModelData, indent=4, ensure_ascii=False)
                         BaseModelClass.writeJsonIfChanged(newContent, inferenceModelFile, fileContent)
-    modelList.Check()
 
     if GlobalVars.olivePath:
         printWarning(f"Total {GlobalVars.oliveCheck} config files checked against olive json files")
diff --git a/.aitk/scripts/sanitize/utils.py b/.aitk/scripts/sanitize/utils.py
index 633b1b87..8640b814 100644
--- a/.aitk/scripts/sanitize/utils.py
+++ b/.aitk/scripts/sanitize/utils.py
@@ -43,6 +43,7 @@ class GlobalVars:
         RuntimeEnum.NvidiaGPU: EPNames.CUDAExecutionProvider,
         RuntimeEnum.NvidiaTRTRTX: EPNames.NvTensorRTRTXExecutionProvider,
         RuntimeEnum.DML: EPNames.DmlExecutionProvider,
+        RuntimeEnum.WebGpu: EPNames.WebGpuExecutionProvider,
     }
     RuntimeToOliveDeviceType = {
         RuntimeEnum.CPU: OliveDeviceTypes.CPU,
@@ -54,6 +55,7 @@ class GlobalVars:
         RuntimeEnum.AMDNPU: OliveDeviceTypes.NPU,
         RuntimeEnum.NvidiaGPU: OliveDeviceTypes.GPU,
         RuntimeEnum.DML: OliveDeviceTypes.GPU,
+        RuntimeEnum.WebGpu: OliveDeviceTypes.GPU,
     }
     RuntimeToDisplayName = {
         RuntimeEnum.CPU: "CPU",
@@ -66,6 +68,7 @@ class GlobalVars:
         RuntimeEnum.NvidiaGPU: "NVIDIA GPU",
         RuntimeEnum.NvidiaTRTRTX: "NVIDIA TensorRT for RTX",
         RuntimeEnum.DML: "DirectML",
+        RuntimeEnum.WebGpu: "WebGpu",
     }
 
     @classmethod
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
index 999a03d6..474439f6 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config
@@ -27,6 +27,10 @@
                 {
                     "find": "deepseek_dml_config",
                     "replace": "qwen2_5_dml_config"
+                },
+                {
+                    "find": "deepseek_webgpu_config",
+                    "replace": "qwen2_5_webgpu_config"
                 }
             ]
         },
@@ -106,6 +110,11 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config",
+            "dst": "qwen2_5_trtrtx_config.json.config",
+            "replacements": []
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json",
             "dst": "qwen2_5_dml_config.json",
@@ -121,13 +130,27 @@
             ]
         },
         {
-            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config",
-            "dst": "qwen2_5_trtrtx_config.json.config",
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config",
+            "dst": "qwen2_5_dml_config.json.config",
             "replacements": []
         },
         {
-            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config",
-            "dst": "qwen2_5_dml_config.json.config",
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json",
+            "dst": "qwen2_5_webgpu_config.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "Qwen/Qwen2.5-1.5B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/qwen2_5"
+                }
+            ]
+        },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config",
+            "dst": "qwen2_5_webgpu_config.json.config",
             "replacements": []
         },
         {
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
index 61cb1603..8f5a8378 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config
@@ -19,6 +19,10 @@
         {
             "file": "qwen2_5_dml_config.json",
             "templateName": "qwen2_5_dml_config"
+        },
+        {
+            "file": "qwen2_5_webgpu_config.json",
+            "templateName": "qwen2_5_webgpu_config"
         }
     ],
     "modelInfo": {
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
new file mode 100644
index 00000000..1ed86300
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "Qwen/Qwen2.5-1.5B-Instruct"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/qwen2_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
\ No newline at end of file
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
new file mode 100644
index 00000000..183017fd
--- /dev/null
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
@@ -0,0 +1,42 @@
+{
+    "name": "Convert to WebGpu",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGpu"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
new file mode 100644
index 00000000..c02096ff
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/deepseek",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
\ No newline at end of file
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
new file mode 100644
index 00000000..183017fd
--- /dev/null
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
@@ -0,0 +1,42 @@
+{
+    "name": "Convert to WebGpu",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGpu"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
index 0a43310f..e430a860 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml
@@ -22,6 +22,9 @@ recipes:
     - file: "deepseek_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "deepseek_webgpu_config.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
index d78581fe..b34c8b3a 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config
@@ -19,6 +19,10 @@
         {
             "file": "deepseek_dml_config.json",
             "templateName": "deepseek_dml_config"
+        },
+        {
+            "file": "deepseek_webgpu_config.json",
+            "templateName": "deepseek_webgpu_config"
         }
     ],
     "modelInfo": {
diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
new file mode 100644
index 00000000..ebbca861
--- /dev/null
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
@@ -0,0 +1,130 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "Intel/bert-base-uncased-mrpc",
+        "task": "text-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "glue_mrpc_eval",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "glue",
+                "subset": "mrpc",
+                "split": "validation"
+            },
+            "pre_process_data_config": {
+                "max_length": 128,
+                "padding": "max_length",
+                "input_cols": [
+                    "sentence1",
+                    "sentence2"
+                ],
+                "max_samples": 100
+            },
+            "dataloader_config": {
+                "batch_size": 1
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "data_config": "glue_mrpc_eval",
+                    "sub_types": [
+                        {
+                            "name": "accuracy_score",
+                            "priority": 1
+                        },
+                        {
+                            "name": "f1_score"
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "glue_mrpc_eval",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "priority": 2
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 20,
+            "save_as_external_data": true
+        },
+        "transformer_optimizer": {
+            "type": "OrtTransformersOptimization",
+            "model_type": "bert",
+            "opt_level": 0,
+            "float16": true,
+            "use_gpu": true,
+            "keep_io_types": false,
+            "optimization_options": {
+                "enable_gelu": true,
+                "enable_layer_norm": true,
+                "enable_attention": true,
+                "enable_skip_layer_norm": false,
+                "enable_embed_layer_norm": false,
+                "enable_bias_skip_layer_norm": false,
+                "enable_bias_gelu": false,
+                "enable_gelu_approximation": false,
+                "enable_qordered_matmul": false,
+                "enable_shape_inference": true,
+                "enable_gemm_fast_gelu": false,
+                "enable_nhwc_conv": false,
+                "enable_group_norm": false,
+                "enable_bias_splitgelu": false,
+                "enable_packed_qkv": true,
+                "enable_packed_kv": true,
+                "enable_bias_add": false,
+                "enable_rotary_embeddings": true
+            },
+            "save_as_external_data": true
+        }
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "host_system",
+    "target": "target_system",
+    "cache_dir": "cache",
+    "output_dir": "model/bert_webgpu"
+}
\ No newline at end of file
diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
new file mode 100644
index 00000000..eadfcfce
--- /dev/null
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
@@ -0,0 +1,102 @@
+{
+    "name": "Convert to WebGpu",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGpu"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.data_name",
+                    "values": [
+                        "glue"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.data_name",
+                        "values": [
+                            "glue"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Split",
+                    "tags": [
+                        "EvaluationDatasetSplit",
+                        "DependsOnDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.split",
+                    "values": [
+                        "train",
+                        "validation",
+                        "test"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.split",
+                        "template": "EvaluationDatasetSplit"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].pre_process_data_config.max_samples",
+                    "template": {
+                        "path": "data_configs[0].pre_process_data_config.max_samples",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/info.yml b/intel-bert-base-uncased-mrpc/aitk/info.yml
index 6655a388..c6b22743 100644
--- a/intel-bert-base-uncased-mrpc/aitk/info.yml
+++ b/intel-bert-base-uncased-mrpc/aitk/info.yml
@@ -22,6 +22,9 @@ recipes:
     - file: "bert_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "bert_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/Intel/bert-base-uncased-mrpc"
@@ -32,3 +35,4 @@ aitk:
     - file: "bert_ov.json"
     - file: "bert_trtrtx.json"
     - file: "bert_dml.json"
+    - file: "bert_webgpu.json"
diff --git a/intel-bert-base-uncased-mrpc/aitk/model_project.config b/intel-bert-base-uncased-mrpc/aitk/model_project.config
index ca302634..42b55101 100644
--- a/intel-bert-base-uncased-mrpc/aitk/model_project.config
+++ b/intel-bert-base-uncased-mrpc/aitk/model_project.config
@@ -19,6 +19,10 @@
         {
             "file": "bert_dml.json",
             "templateName": "bert_dml"
+        },
+        {
+            "file": "bert_webgpu.json",
+            "templateName": "bert_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
index 87aead8c..0865b5d0 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config
@@ -27,6 +27,10 @@
                 {
                     "find": "deepseek_dml_config",
                     "replace": "llama3_2_dml_config"
+                },
+                {
+                    "find": "deepseek_webgpu_config",
+                    "replace": "llama3_2_webgpu_config"
                 }
             ]
         },
@@ -122,6 +126,11 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config",
+            "dst": "llama3_2_trtrtx_config.json.config",
+            "replacements": []
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json",
             "dst": "llama3_2_dml_config.json",
@@ -137,13 +146,27 @@
             ]
         },
         {
-            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config",
-            "dst": "llama3_2_trtrtx_config.json.config",
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config",
+            "dst": "llama3_2_dml_config.json.config",
             "replacements": []
         },
         {
-            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config",
-            "dst": "llama3_2_dml_config.json.config",
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json",
+            "dst": "llama3_2_webgpu_config.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "meta-llama/Llama-3.2-1B-Instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/llama3_2"
+                }
+            ]
+        },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config",
+            "dst": "llama3_2_webgpu_config.json.config",
             "replacements": []
         },
         {
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
new file mode 100644
index 00000000..a9466a7d
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "meta-llama/Llama-3.2-1B-Instruct"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/llama3_2",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
\ No newline at end of file
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
new file mode 100644
index 00000000..183017fd
--- /dev/null
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
@@ -0,0 +1,42 @@
+{
+    "name": "Convert to WebGpu",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGpu"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
index e800ea2d..eeb5c936 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config
@@ -19,6 +19,10 @@
         {
             "file": "llama3_2_dml_config.json",
             "templateName": "llama3_2_dml_config"
+        },
+        {
+            "file": "llama3_2_webgpu_config.json",
+            "templateName": "llama3_2_webgpu_config"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
index e78f3b56..49a12afa 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config
@@ -27,6 +27,10 @@
                 {
                     "find": "deepseek_dml_config",
                     "replace": "phi3_5_dml_config"
+                },
+                {
+                    "find": "deepseek_webgpu_config",
+                    "replace": "phi3_5_webgpu_config"
                 }
             ]
         },
@@ -106,6 +110,11 @@
                 }
             ]
         },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config",
+            "dst": "phi3_5_trtrtx_config.json.config",
+            "replacements": []
+        },
         {
             "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json",
             "dst": "phi3_5_dml_config.json",
@@ -121,13 +130,27 @@
             ]
         },
         {
-            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config",
-            "dst": "phi3_5_trtrtx_config.json.config",
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config",
+            "dst": "phi3_5_dml_config.json.config",
             "replacements": []
         },
         {
-            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config",
-            "dst": "phi3_5_dml_config.json.config",
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json",
+            "dst": "phi3_5_webgpu_config.json",
+            "replacements": [
+                {
+                    "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+                    "replace": "microsoft/Phi-3.5-mini-instruct"
+                },
+                {
+                    "find": "model/deepseek",
+                    "replace": "model/phi3_5"
+                }
+            ]
+        },
+        {
+            "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config",
+            "dst": "phi3_5_webgpu_config.json.config",
             "replacements": []
         },
         {
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
index 358e2c6c..91eba57f 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config
@@ -19,6 +19,10 @@
         {
             "file": "phi3_5_dml_config.json",
             "templateName": "phi3_5_dml_config"
+        },
+        {
+            "file": "phi3_5_webgpu_config.json",
+            "templateName": "phi3_5_webgpu_config"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
new file mode 100644
index 00000000..9c15cd02
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
@@ -0,0 +1,38 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/Phi-3.5-mini-instruct"
+    },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "wikitext2_train",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "wikitext",
+                "subset": "wikitext-2-raw-v1",
+                "split": "train"
+            },
+            "pre_process_data_config": {
+                "strategy": "line-by-line",
+                "add_special_tokens": false,
+                "max_samples": 128,
+                "max_seq_len": 512
+            }
+        }
+    ],
+    "passes": {
+        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+    },
+    "target": "local_system",
+    "log_severity_level": 1,
+    "output_dir": "model/phi3_5",
+    "cache_dir": "cache",
+    "no_artifacts": true,
+    "evaluate_input_model": false
+}
\ No newline at end of file
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config
new file mode 100644
index 00000000..183017fd
--- /dev/null
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config
@@ -0,0 +1,42 @@
+{
+    "name": "Convert to WebGpu",
+    "oliveFile": "",
+    "isLLM": true,
+    "debugInfo": {
+        "autoGenerated": true,
+        "useModelBuilder": "builder"
+    },
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGpu"
+        ],
+        "path": "systems.local_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.builder",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}

From 38d6ff4ee9d11be27e311b5639e73f5be7c6c444 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Tue, 19 Aug 2025 16:29:22 +0800
Subject: [PATCH 05/14] align with AITK

---
 .aitk/configs/model_list.json                             | 8 ++++----
 .aitk/scripts/model_lab/__init__.py                       | 2 +-
 .aitk/scripts/sanitize/utils.py                           | 6 +++---
 .../aitk/qwen2_5_webgpu_config.json.config                | 4 ++--
 .../aitk/deepseek_webgpu_config.json.config               | 4 ++--
 intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config | 4 ++--
 .../aitk/llama3_2_webgpu_config.json.config               | 4 ++--
 .../aitk/phi3_5_webgpu_config.json.config                 | 4 ++--
 8 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index 2acec4fe..c5c96f36 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -28,7 +28,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WebGpu"
+                "WEBGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -116,7 +116,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WebGpu"
+                "WEBGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -583,7 +583,7 @@
         "NVIDIA GPU": "NvidiaGPU",
         "NVIDIA TensorRT for RTX": "NvidiaTRTRTX",
         "DirectML": "DML",
-        "WebGpu": "WebGpu"
+        "WebGPU": "WEBGPU"
     },
     "RuntimeToDisplayName": {
         "CPU": "CPU",
@@ -596,6 +596,6 @@
         "NvidiaGPU": "NVIDIA GPU",
         "NvidiaTRTRTX": "NVIDIA TensorRT for RTX",
         "DML": "DirectML",
-        "WebGpu": "WebGpu"
+        "WEBGPU": "WebGPU"
     }
 }
diff --git a/.aitk/scripts/model_lab/__init__.py b/.aitk/scripts/model_lab/__init__.py
index 7d9c062d..7ff8cf8b 100644
--- a/.aitk/scripts/model_lab/__init__.py
+++ b/.aitk/scripts/model_lab/__init__.py
@@ -11,7 +11,7 @@ class RuntimeEnum(Enum):
     IntelGPU = "IntelGPU"
     IntelNPU = "IntelNPU"
     DML = "DML"
-    WebGpu = "WebGpu"
+    WEBGPU = "WEBGPU"
     NvidiaGPU = "NvidiaGPU"
     WCR = "WCR"
     WCR_CUDA = "WCR_CUDA"
diff --git a/.aitk/scripts/sanitize/utils.py b/.aitk/scripts/sanitize/utils.py
index 8640b814..97806f90 100644
--- a/.aitk/scripts/sanitize/utils.py
+++ b/.aitk/scripts/sanitize/utils.py
@@ -43,7 +43,7 @@ class GlobalVars:
         RuntimeEnum.NvidiaGPU: EPNames.CUDAExecutionProvider,
         RuntimeEnum.NvidiaTRTRTX: EPNames.NvTensorRTRTXExecutionProvider,
         RuntimeEnum.DML: EPNames.DmlExecutionProvider,
-        RuntimeEnum.WebGpu: EPNames.WebGpuExecutionProvider,
+        RuntimeEnum.WEBGPU: EPNames.WebGpuExecutionProvider,
     }
     RuntimeToOliveDeviceType = {
         RuntimeEnum.CPU: OliveDeviceTypes.CPU,
@@ -55,7 +55,7 @@ class GlobalVars:
         RuntimeEnum.AMDNPU: OliveDeviceTypes.NPU,
         RuntimeEnum.NvidiaGPU: OliveDeviceTypes.GPU,
         RuntimeEnum.DML: OliveDeviceTypes.GPU,
-        RuntimeEnum.WebGpu: OliveDeviceTypes.GPU,
+        RuntimeEnum.WEBGPU: OliveDeviceTypes.GPU,
     }
     RuntimeToDisplayName = {
         RuntimeEnum.CPU: "CPU",
@@ -68,7 +68,7 @@ class GlobalVars:
         RuntimeEnum.NvidiaGPU: "NVIDIA GPU",
         RuntimeEnum.NvidiaTRTRTX: "NVIDIA TensorRT for RTX",
         RuntimeEnum.DML: "DirectML",
-        RuntimeEnum.WebGpu: "WebGpu",
+        RuntimeEnum.WEBGPU: "WebGPU",
     }
 
     @classmethod
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
index 183017fd..60a17277 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
@@ -1,5 +1,5 @@
 {
-    "name": "Convert to WebGpu",
+    "name": "Convert to WebGPU",
     "oliveFile": "",
     "isLLM": true,
     "debugInfo": {
@@ -12,7 +12,7 @@
         "name": "Evaluate on",
         "type": "enum",
         "displayNames": [
-            "WebGpu"
+            "WebGPU"
         ],
         "path": "systems.local_system.accelerators.0.execution_providers.0",
         "values": [
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
index 183017fd..60a17277 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
@@ -1,5 +1,5 @@
 {
-    "name": "Convert to WebGpu",
+    "name": "Convert to WebGPU",
     "oliveFile": "",
     "isLLM": true,
     "debugInfo": {
@@ -12,7 +12,7 @@
         "name": "Evaluate on",
         "type": "enum",
         "displayNames": [
-            "WebGpu"
+            "WebGPU"
         ],
         "path": "systems.local_system.accelerators.0.execution_providers.0",
         "values": [
diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
index eadfcfce..b0f0ed25 100644
--- a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config
@@ -1,12 +1,12 @@
 {
-    "name": "Convert to WebGpu",
+    "name": "Convert to WebGPU",
     "addCpu": false,
     "runtime": {
         "autoGenerated": true,
         "name": "Evaluate on",
         "type": "enum",
         "displayNames": [
-            "WebGpu"
+            "WebGPU"
         ],
         "path": "systems.target_system.accelerators.0.execution_providers.0",
         "values": [
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
index 183017fd..60a17277 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
@@ -1,5 +1,5 @@
 {
-    "name": "Convert to WebGpu",
+    "name": "Convert to WebGPU",
     "oliveFile": "",
     "isLLM": true,
     "debugInfo": {
@@ -12,7 +12,7 @@
         "name": "Evaluate on",
         "type": "enum",
         "displayNames": [
-            "WebGpu"
+            "WebGPU"
         ],
         "path": "systems.local_system.accelerators.0.execution_providers.0",
         "values": [
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config
index 183017fd..60a17277 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config
@@ -1,5 +1,5 @@
 {
-    "name": "Convert to WebGpu",
+    "name": "Convert to WebGPU",
     "oliveFile": "",
     "isLLM": true,
     "debugInfo": {
@@ -12,7 +12,7 @@
         "name": "Evaluate on",
         "type": "enum",
         "displayNames": [
-            "WebGpu"
+            "WebGPU"
         ],
         "path": "systems.local_system.accelerators.0.execution_providers.0",
         "values": [

From 60caa99312b8376acc201251f94b5e121f99bdcb Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Tue, 19 Aug 2025 17:31:59 +0800
Subject: [PATCH 06/14] fix llm webgpu precision

---
 .aitk/configs/checks.json                     |   6 +-
 .aitk/configs/model_list.json                 |   6 +-
 .../aitk/qwen2_5_webgpu_config.json           |   2 +-
 .../aitk/deepseek_webgpu_config.json          |   2 +-
 .../bert-base-multilingual-cased_webgpu.json  | 138 +++++++++++++++++
 ...base-multilingual-cased_webgpu.json.config | 123 +++++++++++++++
 .../aitk/info.yml                             |   3 +
 .../aitk/model_project.config                 |   4 +
 google-vit-base-patch16-224/aitk/info.yml     |   3 +
 .../aitk/model_project.config                 |   4 +
 .../aitk/vit-base-patch16-224_webgpu.json     | 142 ++++++++++++++++++
 .../vit-base-patch16-224_webgpu.json.config   | 104 +++++++++++++
 .../aitk/llama3_2_webgpu_config.json          |   2 +-
 .../aitk/phi3_5_webgpu_config.json            |   2 +-
 14 files changed, 532 insertions(+), 9 deletions(-)
 create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json
 create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config
 create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json
 create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index 532ef433..e88bd74f 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,13 +1,13 @@
 {
-    "configCheck": 85,
+    "configCheck": 87,
     "extensionCheck": 1,
     "gitignoreCheck": 32,
     "inferenceModelCheck": 22,
     "ipynbCheck": 51,
     "modelProjectCheck": 33,
     "oliveCheck": 0,
-    "oliveJsonCheck": 85,
-    "pathCheck": 772,
+    "oliveJsonCheck": 87,
+    "pathCheck": 789,
     "requirementsCheck": 32,
     "venvRequirementsCheck": 12
 }
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index c5c96f36..724e1117 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -77,7 +77,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WEBGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -96,7 +97,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WEBGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
index 1ed86300..8c4c0f8c 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
@@ -27,7 +27,7 @@
         }
     ],
     "passes": {
-        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+        "builder": { "type": "ModelBuilder", "precision": "int4" }
     },
     "target": "local_system",
     "log_severity_level": 1,
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
index c02096ff..34cd324f 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
@@ -27,7 +27,7 @@
         }
     ],
     "passes": {
-        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+        "builder": { "type": "ModelBuilder", "precision": "int4" }
     },
     "target": "local_system",
     "log_severity_level": 1,
diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json
new file mode 100644
index 00000000..962aba68
--- /dev/null
+++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json
@@ -0,0 +1,138 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "google-bert/bert-base-multilingual-cased",
+        "task": "feature-extraction"
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "evaluation_data_config",
+            "type": "HuggingfaceContainer",
+            "load_dataset_config": {
+                "data_name": "facebook/xnli",
+                "subset": "en",
+                "split": "validation"
+            },
+            "pre_process_data_config": {
+                "input_cols": [
+                    "premise"
+                ],
+                "padding": "max_length",
+                "max_length": 128,
+                "max_samples": 10
+            },
+            "dataloader_config": {
+                "batch_size": 1
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "priority": 1,
+                            "goal": {
+                                "type": "percent-min-improvement",
+                                "value": 0.1
+                            }
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                },
+                {
+                    "name": "throughput",
+                    "type": "throughput",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg"
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 20,
+            "save_as_external_data": true
+        },
+        "transformer_optimizer": {
+            "type": "OrtTransformersOptimization",
+            "model_type": "bert",
+            "opt_level": 0,
+            "float16": true,
+            "use_gpu": true,
+            "keep_io_types": false,
+            "optimization_options": {
+                "enable_gelu": true,
+                "enable_layer_norm": true,
+                "enable_attention": true,
+                "enable_skip_layer_norm": false,
+                "enable_embed_layer_norm": false,
+                "enable_bias_skip_layer_norm": false,
+                "enable_bias_gelu": false,
+                "enable_gelu_approximation": false,
+                "enable_qordered_matmul": false,
+                "enable_shape_inference": true,
+                "enable_gemm_fast_gelu": false,
+                "enable_nhwc_conv": false,
+                "enable_group_norm": false,
+                "enable_bias_splitgelu": false,
+                "enable_packed_qkv": true,
+                "enable_packed_kv": true,
+                "enable_bias_add": false,
+                "enable_rotary_embeddings": true
+            },
+            "save_as_external_data": true
+        }
+    },
+    "host": "host_system",
+    "target": "target_system",
+    "evaluator": "common_evaluator",
+    "cache_dir": "cache",
+    "output_dir": "model/google_bert",
+    "evaluate_input_model": false
+}
diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config
new file mode 100644
index 00000000..16d4d9bd
--- /dev/null
+++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config
@@ -0,0 +1,123 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.data_name",
+                    "values": [
+                        "facebook/xnli"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.data_name",
+                        "values": [
+                            "facebook/xnli"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Subset",
+                    "tags": [
+                        "EvaluationDatasetSubset",
+                        "DependsOnDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.subset",
+                    "values": [
+                        "en",
+                        "all_languages"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.subset",
+                        "values": [
+                            "en",
+                            "all_languages"
+                        ],
+                        "template": "EvaluationDatasetSubset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Split",
+                    "tags": [
+                        "EvaluationDatasetSplit",
+                        "DependsOnDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.split",
+                    "values": [
+                        "train",
+                        "validation",
+                        "test"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.split",
+                        "template": "EvaluationDatasetSplit"
+                    }
+                },
+                {
+                    "name": "Quantization Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].pre_process_data_config.max_samples",
+                    "template": {
+                        "path": "data_configs[0].pre_process_data_config.max_samples",
+                        "template": "QuantizationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/google-bert-bert-base-multilingual-cased/aitk/info.yml b/google-bert-bert-base-multilingual-cased/aitk/info.yml
index b87cb62d..dad37666 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/info.yml
+++ b/google-bert-bert-base-multilingual-cased/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "bert-base-multilingual-cased_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "bert-base-multilingual-cased_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/google-bert/bert-base-multilingual-cased"
diff --git a/google-bert-bert-base-multilingual-cased/aitk/model_project.config b/google-bert-bert-base-multilingual-cased/aitk/model_project.config
index 41846e12..1272f822 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/model_project.config
+++ b/google-bert-bert-base-multilingual-cased/aitk/model_project.config
@@ -19,6 +19,10 @@
         {
             "file": "bert-base-multilingual-cased_dml.json",
             "templateName": "bert-base-multilingual-cased_dml"
+        },
+        {
+            "file": "bert-base-multilingual-cased_webgpu.json",
+            "templateName": "bert-base-multilingual-cased_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/google-vit-base-patch16-224/aitk/info.yml b/google-vit-base-patch16-224/aitk/info.yml
index cdc2474e..2ca96f51 100644
--- a/google-vit-base-patch16-224/aitk/info.yml
+++ b/google-vit-base-patch16-224/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "vit-base-patch16-224_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "vit-base-patch16-224_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/google/vit-base-patch16-224"
diff --git a/google-vit-base-patch16-224/aitk/model_project.config b/google-vit-base-patch16-224/aitk/model_project.config
index 7ec62cd3..41f5cd4e 100644
--- a/google-vit-base-patch16-224/aitk/model_project.config
+++ b/google-vit-base-patch16-224/aitk/model_project.config
@@ -19,6 +19,10 @@
         {
             "file": "vit-base-patch16-224_dml.json",
             "templateName": "vit-base-patch16-224_dml"
+        },
+        {
+            "file": "vit-base-patch16-224_webgpu.json",
+            "templateName": "vit-base-patch16-224_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json
new file mode 100644
index 00000000..c1b6866e
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json
@@ -0,0 +1,142 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "google/vit-base-patch16-224",
+        "task": "image-classification",
+        "io_config": {
+            "input_names": [
+                "pixel_values"
+            ],
+            "input_shapes": [
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ]
+            ],
+            "output_names": [
+                "output"
+            ]
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "evaluation_data_config",
+            "type": "HuggingfaceContainer",
+            "user_script": "vit-base-patch16-224.py",
+            "load_dataset_config": {
+                "data_name": "timm/mini-imagenet",
+                "split": "validation",
+                "streaming": true,
+                "trust_remote_code": true
+            },
+            "pre_process_data_config": {
+                "type": "dataset_pre_process",
+                "size": 1000,
+                "cache_key": "imagedata_evaluation"
+            },
+            "post_process_data_config": {
+                "type": "dataset_post_process"
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy_score",
+                            "priority": 1,
+                            "metric_config": {
+                                "task": "multiclass",
+                                "num_classes": 1000
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "priority": 2
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 20,
+            "save_as_external_data": true
+        },
+        "transformer_optimizer": {
+            "type": "OrtTransformersOptimization",
+            "model_type": "vit",
+            "opt_level": 0,
+            "float16": true,
+            "use_gpu": true,
+            "keep_io_types": false,
+            "optimization_options": {
+                "enable_gelu": true,
+                "enable_layer_norm": true,
+                "enable_attention": true,
+                "enable_skip_layer_norm": false,
+                "enable_embed_layer_norm": false,
+                "enable_bias_skip_layer_norm": false,
+                "enable_bias_gelu": false,
+                "enable_gelu_approximation": false,
+                "enable_qordered_matmul": false,
+                "enable_shape_inference": true,
+                "enable_gemm_fast_gelu": false,
+                "enable_nhwc_conv": false,
+                "enable_group_norm": false,
+                "enable_bias_splitgelu": false,
+                "enable_packed_qkv": true,
+                "enable_packed_kv": true,
+                "enable_bias_add": false,
+                "enable_rotary_embeddings": true
+            },
+            "save_as_external_data": true
+        }
+    },
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "host_system",
+    "target": "target_system",
+    "cache_dir": "cache",
+    "output_dir": "model/vit"
+}
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config
new file mode 100644
index 00000000..23c3f074
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config
@@ -0,0 +1,104 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.data_name",
+                    "values": [
+                        "timm/mini-imagenet",
+                        "imagenet-1k"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.data_name",
+                        "values": [
+                            "timm/mini-imagenet",
+                            "imagenet-1k"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Split",
+                    "tags": [
+                        "EvaluationDatasetSplit",
+                        "DependsOnDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.split",
+                    "values": [
+                        "train",
+                        "validation",
+                        "test"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.split",
+                        "template": "EvaluationDatasetSplit"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].pre_process_data_config.size",
+                    "template": {
+                        "path": "data_configs[0].pre_process_data_config.size",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
index a9466a7d..c27b5153 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
@@ -27,7 +27,7 @@
         }
     ],
     "passes": {
-        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+        "builder": { "type": "ModelBuilder", "precision": "int4" }
     },
     "target": "local_system",
     "log_severity_level": 1,
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
index 9c15cd02..71037506 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
@@ -27,7 +27,7 @@
         }
     ],
     "passes": {
-        "builder": { "type": "ModelBuilder", "precision": "fp16" }
+        "builder": { "type": "ModelBuilder", "precision": "int4" }
     },
     "target": "local_system",
     "log_severity_level": 1,

From ecc64cfaff9a864ee8dedb00a90ed086e5a09ae3 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Wed, 20 Aug 2025 13:10:11 +0800
Subject: [PATCH 07/14] add all webgpu recipes

---
 .aitk/configs/checks.json                     |   8 +-
 .aitk/configs/model_list.json                 |  21 +-
 Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml      |   3 +
 .../aitk/_copy.json.config                    |  10 +
 ...-patch16-224_webgpu_inference_sample.ipynb | 233 ++++++++++++++++++
 intel-bert-base-uncased-mrpc/aitk/info.yml    |   7 -
 .../aitk/_copy.json.config                    |  25 ++
 .../aitk/info.yml                             |   3 +
 .../aitk/laion_clip_webgpu.json               | 206 ++++++++++++++++
 .../aitk/laion_clip_webgpu.json.config        |  84 +++++++
 .../laion_clip_webgpu_inference_sample.ipynb  | 115 +++++++++
 .../aitk/model_project.config                 |   4 +
 .../aitk/info.yml                             |   3 +
 microsoft-Phi-3.5-mini-instruct/aitk/info.yml |   3 +
 microsoft-resnet-50/aitk/_copy.json.config    |  10 +
 microsoft-resnet-50/aitk/info.yml             |   3 +
 microsoft-resnet-50/aitk/model_project.config |   4 +
 microsoft-resnet-50/aitk/resnet_webgpu.json   | 121 +++++++++
 .../aitk/resnet_webgpu.json.config            | 104 ++++++++
 .../aitk/resnet_webgpu_inference_sample.ipynb | 145 +++++++++++
 microsoft-resnet-50/aitk/winml.py             |  21 ++
 .../aitk/_copy.json.config                    |  10 +
 openai-clip-vit-base-patch16/aitk/info.yml    |   3 +
 .../aitk/model_project.config                 |   4 +
 .../aitk/openai_clip_webgpu.json              | 206 ++++++++++++++++
 .../aitk/openai_clip_webgpu.json.config       |  84 +++++++
 .../openai_clip_webgpu_inference_sample.ipynb | 115 +++++++++
 .../aitk/_copy.json.config                    |  25 ++
 openai-clip-vit-base-patch32/aitk/info.yml    |   3 +
 .../aitk/model_project.config                 |   4 +
 .../aitk/openai_clip_webgpu.json              | 206 ++++++++++++++++
 .../aitk/openai_clip_webgpu.json.config       |  84 +++++++
 .../openai_clip_webgpu_inference_sample.ipynb | 115 +++++++++
 33 files changed, 1974 insertions(+), 18 deletions(-)
 create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb
 create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
 create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config
 create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb
 create mode 100644 microsoft-resnet-50/aitk/resnet_webgpu.json
 create mode 100644 microsoft-resnet-50/aitk/resnet_webgpu.json.config
 create mode 100644 microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb
 create mode 100644 microsoft-resnet-50/aitk/winml.py
 create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
 create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
 create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb
 create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
 create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config
 create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index e88bd74f..6fcea59f 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,13 +1,13 @@
 {
-    "configCheck": 87,
+    "configCheck": 91,
     "extensionCheck": 1,
     "gitignoreCheck": 32,
     "inferenceModelCheck": 22,
-    "ipynbCheck": 51,
+    "ipynbCheck": 56,
     "modelProjectCheck": 33,
     "oliveCheck": 0,
-    "oliveJsonCheck": 87,
-    "pathCheck": 789,
+    "oliveJsonCheck": 91,
+    "pathCheck": 818,
     "requirementsCheck": 32,
     "venvRequirementsCheck": 12
 }
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index 724e1117..eb795b84 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -137,7 +137,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WEBGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -156,7 +157,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WEBGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -205,7 +207,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WEBGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -278,7 +281,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WEBGPU"
             ],
             "architecture": "CNN",
             "status": "Ready",
@@ -310,7 +314,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WEBGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -329,7 +334,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WEBGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -378,7 +384,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WEBGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
index e7bacd4c..efe47e27 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "qwen2_5_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "qwen2_5_webgpu_config.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct"
diff --git a/google-vit-base-patch16-224/aitk/_copy.json.config b/google-vit-base-patch16-224/aitk/_copy.json.config
index f924a60d..9f13da90 100644
--- a/google-vit-base-patch16-224/aitk/_copy.json.config
+++ b/google-vit-base-patch16-224/aitk/_copy.json.config
@@ -38,6 +38,16 @@
                 }
             ]
         },
+        {
+            "src": "vit-base-patch16-224_dml_inference_sample.ipynb",
+            "dst": "vit-base-patch16-224_webgpu_inference_sample.ipynb",
+            "replacements": [
+                {
+                    "find": "DmlExecutionProvider",
+                    "replace": "WebGpuExecutionProvider"
+                }
+            ]
+        },
         {
             "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
             "dst": "winml.py"
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb
new file mode 100644
index 00000000..fa55eb61
--- /dev/null
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb
@@ -0,0 +1,233 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "\n",
+    "ExecutionProvider=\"WebGpuExecutionProvider\"\n",
+    "if ExecutionProvider == \"OpenVINOExecutionProvider\":\n",
+    "    onnx_model_path = \"./model/ov_model_st_quant.onnx\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import onnxruntime as ort\n",
+    "import time\n",
+    "import torch\n",
+    "import torchvision.transforms as transforms\n",
+    "from datasets import load_dataset\n",
+    "from transformers import ViTFeatureExtractor, ViTForImageClassification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_samples = 256"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load datasets\n",
+    "\n",
+    "feature_extractor = ViTFeatureExtractor.from_pretrained(\"google/vit-base-patch16-224\")\n",
+    "preprocess = transforms.Compose([\n",
+    "    transforms.Lambda(lambda img: img.convert(\"RGB\")),\n",
+    "    transforms.Resize((224, 224)),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),\n",
+    "])\n",
+    "\n",
+    "def imageTransform(example):\n",
+    "    example[\"image\"] = preprocess(example[\"image\"])\n",
+    "    return example\n",
+    "datasetStream = load_dataset(\"timm/mini-imagenet\", split=\"validation\", streaming=True, trust_remote_code=True)\n",
+    "iterable_dataset = iter(datasetStream)\n",
+    "selected_samples = [next(iterable_dataset) for _ in range(num_samples)]\n",
+    "selected_samples = list(map(imageTransform, selected_samples))\n",
+    "\n",
+    "def get_imagenet_label_map():\n",
+    "    import json\n",
+    "    from pathlib import Path\n",
+    "    cache_file = Path(f\"../../cache/data/imagenet_class_index.json\")\n",
+    "    if not cache_file.exists():\n",
+    "        import requests        \n",
+    "        imagenet_class_index_url = (\n",
+    "            \"https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json\"\n",
+    "        )\n",
+    "        response = requests.get(imagenet_class_index_url)\n",
+    "        response.raise_for_status()  # Ensure the request was successful\n",
+    "        content = response.json()\n",
+    "        cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)\n",
+    "        with open(cache_file, \"w\") as f:\n",
+    "            json.dump(content, f)\n",
+    "    else:\n",
+    "        with open(cache_file) as f:\n",
+    "            content = json.loads(f.read())\n",
+    "\n",
+    "    return {v[0]: int(k) for k, v in content.items()}\n",
+    "\n",
+    "label_map = get_imagenet_label_map()\n",
+    "label_names = datasetStream.features[\"label\"].names\n",
+    "\n",
+    "def mini_to_imagenet_label(mini_label):\n",
+    "    class_name = label_names[mini_label]\n",
+    "    return label_map[class_name]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Original model metrics\n",
+    "\n",
+    "def evaluate_torch(model, selected_samples, device):\n",
+    "    model.eval()\n",
+    "    correct, total = 0, 0\n",
+    "    latencies = []\n",
+    "    with torch.no_grad():\n",
+    "        for example in selected_samples:\n",
+    "            image = example[\"image\"].unsqueeze(0).to(device)\n",
+    "            label = torch.tensor(example[\"label\"]).to(device)\n",
+    "            label = mini_to_imagenet_label(label.item())\n",
+    "            \n",
+    "            start_time = time.time()\n",
+    "            output = model(image)\n",
+    "            end_time = time.time()\n",
+    "            \n",
+    "            latencies.append((end_time - start_time))\n",
+    "            pred = torch.argmax(output.logits, dim=1)\n",
+    "            correct += (pred == label).sum().item()\n",
+    "            total += 1\n",
+    "    \n",
+    "    accuracy = correct / total\n",
+    "    avg_latency = np.mean(latencies)\n",
+    "    return accuracy, avg_latency\n",
+    "\n",
+    "device = torch.device(\"cpu\")\n",
+    "model = ViTForImageClassification.from_pretrained(\"google/vit-base-patch16-224\").to(device)\n",
+    "accuracy, avg_latency = evaluate_torch(model, selected_samples, device)\n",
+    "\n",
+    "print(f\"Original Model Accuracy: {accuracy * 100:.2f}%\")\n",
+    "print(f\"Original Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Quantized model metrics\n",
+    "\n",
+    "def evaluate_onnx(session, selected_samples):\n",
+    "    correct, total = 0, 0\n",
+    "    latencies = []\n",
+    "    input_name = session.get_inputs()[0].name\n",
+    "    output_name = session.get_outputs()[0].name\n",
+    "\n",
+    "    for example in selected_samples:\n",
+    "        image = np.expand_dims(example[\"image\"], axis=0)\n",
+    "        label = example[\"label\"]\n",
+    "        label = mini_to_imagenet_label(label)\n",
+    "        \n",
+    "        start_time = time.time()\n",
+    "        output = session.run([output_name], {input_name: image.astype(np.float16)})[0]\n",
+    "        end_time = time.time()\n",
+    "        \n",
+    "        latencies.append((end_time - start_time))\n",
+    "        pred = np.argmax(output, axis=1)[0]\n",
+    "        correct += (pred == label)\n",
+    "        total += 1\n",
+    "    \n",
+    "    accuracy = correct / total\n",
+    "    avg_latency = np.mean(latencies)\n",
+    "    return accuracy, avg_latency\n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "session = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "accuracy, avg_latency = evaluate_onnx(session, selected_samples)\n",
+    "\n",
+    "print(f\"Quantized Model Accuracy: {accuracy * 100:.2f}%\")\n",
+    "print(f\"Quantized Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python-WCR-win32-x64-3.12.9",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/intel-bert-base-uncased-mrpc/aitk/info.yml b/intel-bert-base-uncased-mrpc/aitk/info.yml
index c6b22743..36c8d26b 100644
--- a/intel-bert-base-uncased-mrpc/aitk/info.yml
+++ b/intel-bert-base-uncased-mrpc/aitk/info.yml
@@ -29,10 +29,3 @@ aitk:
     modelInfo:
         id: "huggingface/Intel/bert-base-uncased-mrpc"
         version: 1
-    workflows:
-    - file: "bert_qdq_qnn.json"
-    - file: "bert_qdq_amd.json"
-    - file: "bert_ov.json"
-    - file: "bert_trtrtx.json"
-    - file: "bert_dml.json"
-    - file: "bert_webgpu.json"
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
index f99b7656..aabcb60f 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config
@@ -193,6 +193,31 @@
                 }
             ]
         },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json",
+            "dst": "laion_clip_webgpu.json",
+            "replacements": [
+                {
+                    "find": "openai/clip-vit-base-patch16",
+                    "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
+                }
+            ]
+        },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config",
+            "dst": "laion_clip_webgpu.json.config",
+            "replacements": []
+        },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb",
+            "dst": "laion_clip_webgpu_inference_sample.ipynb",
+            "replacements": [
+                {
+                    "find": "openai/clip-vit-base-patch16",
+                    "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
+                }
+            ]
+        },
         {
             "src": "../../openai-clip-vit-base-patch16/aitk/clip_script.py",
             "dst": "clip_script.py"
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
index e13c2a8d..22924ead 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml
@@ -25,6 +25,9 @@ recipes:
     - file: "laion_clip_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "laion_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
new file mode 100644
index 00000000..947df9d3
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
@@ -0,0 +1,206 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "metric_data_config",
+            "user_script": "user_script.py",
+            "load_dataset_config": {
+                "type": "clip_dataset",
+                "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K",
+                "dataset_name": "nlphuji/flickr30k",
+                "start": 0,
+                "end": 10
+            },
+            "dataloader_config": {
+                "type": "no_auto_batch_dataloader"
+            },
+            "post_process_data_config": {
+                "type": "clip_post_process"
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "backend": "huggingface_metrics",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy",
+                            "priority": 1,
+                            "goal": {
+                                "type": "max-degradation",
+                                "value": 0.05
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "goal": {
+                                "type": "percent-min-improvement",
+                                "value": 0.1
+                            }
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                },
+                {
+                    "name": "throughput",
+                    "type": "throughput",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg"
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "transformer_optimizer": {
+            "type": "orttransformersoptimization",
+            "model_type": "clip",
+            "opt_level": 0,
+            "float16": true,
+            "use_gpu": true,
+            "keep_io_types": false,
+            "optimization_options": {
+                "enable_gelu": true,
+                "enable_layer_norm": true,
+                "enable_attention": true,
+                "enable_skip_layer_norm": false,
+                "enable_embed_layer_norm": false,
+                "enable_bias_skip_layer_norm": false,
+                "enable_bias_gelu": false,
+                "enable_gelu_approximation": false,
+                "enable_qordered_matmul": false,
+                "enable_shape_inference": true,
+                "enable_gemm_fast_gelu": false,
+                "enable_nhwc_conv": false,
+                "enable_group_norm": false,
+                "enable_bias_splitgelu": false,
+                "enable_packed_qkv": true,
+                "enable_packed_kv": true,
+                "enable_bias_add": false,
+                "enable_rotary_embeddings": true
+            },
+            "save_as_external_data": true
+        }
+    },
+    "search_strategy": false,
+    "host": "host_system",
+    "target": "target_system",
+    "cache_dir": "cache",
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "output_dir": "model/clip"
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config
new file mode 100644
index 00000000..d17c25fa
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config
@@ -0,0 +1,84 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.dataset_name",
+                    "values": [
+                        "nlphuji/flickr30k"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.dataset_name",
+                        "values": [
+                            "nlphuji/flickr30k"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].load_dataset_config.end",
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.end",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb
new file mode 100644
index 00000000..6938c9bb
--- /dev/null
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"WebGpuExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
index 311c217d..d429eafb 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config
@@ -23,6 +23,10 @@
         {
             "file": "laion_clip_dml.json",
             "templateName": "laion_clip_dml"
+        },
+        {
+            "file": "laion_clip_webgpu.json",
+            "templateName": "laion_clip_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
index 46b40c82..635bc818 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "llama3_2_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "llama3_2_webgpu_config.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/meta-llama/Llama-3.2-1B-Instruct"
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
index 57b16388..2ad80323 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "phi3_5_dml_config.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "phi3_5_webgpu_config.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/microsoft/Phi-3.5-mini-instruct"
diff --git a/microsoft-resnet-50/aitk/_copy.json.config b/microsoft-resnet-50/aitk/_copy.json.config
index 959fd3ba..d94f9329 100644
--- a/microsoft-resnet-50/aitk/_copy.json.config
+++ b/microsoft-resnet-50/aitk/_copy.json.config
@@ -23,6 +23,16 @@
                     "replace": "DmlExecutionProvider"
                 }
             ]
+        },
+        {
+            "src": "resnet_trtrtx_inference_sample.ipynb",
+            "dst": "resnet_webgpu_inference_sample.ipynb",
+            "replacements": [
+                {
+                    "find": "NvTensorRTRTXExecutionProvider",
+                    "replace": "WebGpuExecutionProvider"
+                }
+            ]
         }
     ]
 }
diff --git a/microsoft-resnet-50/aitk/info.yml b/microsoft-resnet-50/aitk/info.yml
index 038d7eb6..af6ef72f 100644
--- a/microsoft-resnet-50/aitk/info.yml
+++ b/microsoft-resnet-50/aitk/info.yml
@@ -20,6 +20,9 @@ recipes:
     - file: "resnet_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "resnet_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/microsoft/resnet-50"
diff --git a/microsoft-resnet-50/aitk/model_project.config b/microsoft-resnet-50/aitk/model_project.config
index 2a944b44..554360ed 100644
--- a/microsoft-resnet-50/aitk/model_project.config
+++ b/microsoft-resnet-50/aitk/model_project.config
@@ -19,6 +19,10 @@
         {
             "file": "resnet_dml.json",
             "templateName": "resnet_dml"
+        },
+        {
+            "file": "resnet_webgpu.json",
+            "templateName": "resnet_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json b/microsoft-resnet-50/aitk/resnet_webgpu.json
new file mode 100644
index 00000000..e64119cf
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_webgpu.json
@@ -0,0 +1,121 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/resnet-50",
+        "task": "image-classification",
+        "io_config": {
+            "input_names": [
+                "pixel_values"
+            ],
+            "input_shapes": [
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ]
+            ],
+            "output_names": [
+                "logits"
+            ]
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "evaluation_data_config",
+            "type": "HuggingfaceContainer",
+            "user_script": "imagenet.py",
+            "load_dataset_config": {
+                "data_name": "timm/mini-imagenet",
+                "split": "validation",
+                "streaming": true,
+                "trust_remote_code": true
+            },
+            "pre_process_data_config": {
+                "type": "dataset_pre_process",
+                "size": 1000,
+                "cache_key": "imagedata_evaluation"
+            },
+            "post_process_data_config": {
+                "type": "dataset_post_process"
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy_score",
+                            "priority": 1,
+                            "metric_config": {
+                                "task": "multiclass",
+                                "num_classes": 1001
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "evaluation_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "priority": 2
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "device": "cpu",
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true,
+            "all_tensors_to_one_file": true,
+            "dynamic": false,
+            "use_dynamo_exporter": false
+        },
+        "onnx_float_to_float16": {
+            "type": "OnnxFloatToFloat16",
+            "save_as_external_data": true
+        }
+    },
+    "host": "host_system",
+    "target": "target_system",
+    "evaluator": "common_evaluator",
+    "cache_dir": "cache",
+    "output_dir": "model/resnet_webgpu",
+    "evaluate_input_model": false
+}
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json.config b/microsoft-resnet-50/aitk/resnet_webgpu.json.config
new file mode 100644
index 00000000..23c3f074
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_webgpu.json.config
@@ -0,0 +1,104 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.data_name",
+                    "values": [
+                        "timm/mini-imagenet",
+                        "imagenet-1k"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.data_name",
+                        "values": [
+                            "timm/mini-imagenet",
+                            "imagenet-1k"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Split",
+                    "tags": [
+                        "EvaluationDatasetSplit",
+                        "DependsOnDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.split",
+                    "values": [
+                        "train",
+                        "validation",
+                        "test"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.split",
+                        "template": "EvaluationDatasetSplit"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].pre_process_data_config.size",
+                    "template": {
+                        "path": "data_configs[0].pre_process_data_config.size",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb
new file mode 100644
index 00000000..7cc8ad1c
--- /dev/null
+++ b/microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb
@@ -0,0 +1,145 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"WebGpuExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from PIL import Image\n",
+    "url = \"https://onnxruntime.ai/images/dog.jpeg\"\n",
+    "response = requests.get(url)\n",
+    "# Save the image to a file\n",
+    "with open(\"dog.jpeg\", \"wb\") as file:\n",
+    "    file.write(response.content)\n",
+    "img = Image.open(\"dog.jpeg\")\n",
+    "img"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import onnxruntime as ort\n",
+    "from PIL import Image\n",
+    "import torch\n",
+    "import torchvision.transforms as transforms\n",
+    "from torchvision.models.resnet import ResNet50_Weights\n",
+    "import numpy as np\n",
+    "\n",
+    "image_file_path = \"dog.jpeg\"\n",
+    "\n",
+    "# Create ONNX runtime session\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    "\n",
+    "\n",
+    "session_options = ort.SessionOptions()\n",
+    "\n",
+    "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n",
+    "\n",
+    "session = ort.InferenceSession(\n",
+    "    onnx_model_path, # a model with QNN EPContext nodes\n",
+    "    sess_options=session_options,\n",
+    ")\n",
+    "\n",
+    "print(\"Available providers:\", session.get_providers())\n",
+    "print(\"Current provider:\", session.get_provider_options())\n",
+    "\n",
+    "# Read and preprocess image\n",
+    "image = Image.open(image_file_path)\n",
+    "preprocess = transforms.Compose([\n",
+    "    transforms.Resize(256),\n",
+    "    transforms.CenterCrop(224),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n",
+    "])\n",
+    "input_tensor = preprocess(image)\n",
+    "input_batch = input_tensor.unsqueeze(0)\n",
+    "\n",
+    "# Run inference\n",
+    "ort_inputs = {session.get_inputs()[0].name: input_batch.numpy().astype(np.float16)}\n",
+    "ort_outputs = session.run(None, ort_inputs)\n",
+    "\n",
+    "# Postprocess to get softmax vector\n",
+    "output = ort_outputs[0]\n",
+    "softmax = torch.nn.functional.softmax(torch.tensor(output), dim=1)\n",
+    "\n",
+    "# Extract top 10 predicted classes\n",
+    "top10 = torch.topk(softmax, 10)\n",
+    "\n",
+    "# Get label mapping\n",
+    "weights = ResNet50_Weights.DEFAULT\n",
+    "labels = weights.meta[\"categories\"]\n",
+    "\n",
+    "# Print results to console\n",
+    "print(\"Top 10 predictions for ResNet50 v2...\")\n",
+    "print(\"--------------------------------------------------------------\")\n",
+    "for i in range(10):\n",
+    "    print(f\"Label: {labels[top10.indices[0][i]]}, Confidence: {top10.values[0][i].item():.4f}\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cpu",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/microsoft-resnet-50/aitk/winml.py b/microsoft-resnet-50/aitk/winml.py
new file mode 100644
index 00000000..74a12c53
--- /dev/null
+++ b/microsoft-resnet-50/aitk/winml.py
@@ -0,0 +1,21 @@
+import json
+
+def _get_ep_paths() -> dict[str, str]:
+    from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import (
+        InitializeOptions,
+        initialize
+    )
+    import winui3.microsoft.windows.ai.machinelearning as winml
+    eps = {}
+    with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI):
+        catalog = winml.ExecutionProviderCatalog.get_default()
+        providers = catalog.find_all_providers()
+        for provider in providers:
+            provider.ensure_ready_async().get()
+            eps[provider.name] = provider.library_path
+            # DO NOT call provider.try_register in python. That will register to the native env.
+    return eps
+
+if __name__ == "__main__":
+    eps = _get_ep_paths()
+    print(json.dumps(eps))
diff --git a/openai-clip-vit-base-patch16/aitk/_copy.json.config b/openai-clip-vit-base-patch16/aitk/_copy.json.config
index 2f6d2216..4d2ae5d3 100644
--- a/openai-clip-vit-base-patch16/aitk/_copy.json.config
+++ b/openai-clip-vit-base-patch16/aitk/_copy.json.config
@@ -24,6 +24,16 @@
                 }
             ]
         },
+        {
+            "src": "openai_clip_trtrtx_inference_sample.ipynb",
+            "dst": "openai_clip_webgpu_inference_sample.ipynb",
+            "replacements": [
+                {
+                    "find": "NvTensorRTRTXExecutionProvider",
+                    "replace": "WebGpuExecutionProvider"
+                }
+            ]
+        },
         {
             "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py",
             "dst": "winml.py"
diff --git a/openai-clip-vit-base-patch16/aitk/info.yml b/openai-clip-vit-base-patch16/aitk/info.yml
index 15f8f493..46122f8a 100644
--- a/openai-clip-vit-base-patch16/aitk/info.yml
+++ b/openai-clip-vit-base-patch16/aitk/info.yml
@@ -25,6 +25,9 @@ recipes:
     - file: "openai_clip_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "openai_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/openai/clip-vit-base-patch16"
diff --git a/openai-clip-vit-base-patch16/aitk/model_project.config b/openai-clip-vit-base-patch16/aitk/model_project.config
index c89ea0ed..13e1c771 100644
--- a/openai-clip-vit-base-patch16/aitk/model_project.config
+++ b/openai-clip-vit-base-patch16/aitk/model_project.config
@@ -23,6 +23,10 @@
         {
             "file": "openai_clip_dml.json",
             "templateName": "openai_clip_dml"
+        },
+        {
+            "file": "openai_clip_webgpu.json",
+            "templateName": "openai_clip_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
new file mode 100644
index 00000000..97493fc9
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
@@ -0,0 +1,206 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "openai/clip-vit-base-patch16",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "metric_data_config",
+            "user_script": "user_script.py",
+            "load_dataset_config": {
+                "type": "clip_dataset",
+                "model_name": "openai/clip-vit-base-patch16",
+                "dataset_name": "nlphuji/flickr30k",
+                "start": 0,
+                "end": 10
+            },
+            "dataloader_config": {
+                "type": "no_auto_batch_dataloader"
+            },
+            "post_process_data_config": {
+                "type": "clip_post_process"
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "backend": "huggingface_metrics",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy",
+                            "priority": 1,
+                            "goal": {
+                                "type": "max-degradation",
+                                "value": 0.05
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "goal": {
+                                "type": "percent-min-improvement",
+                                "value": 0.1
+                            }
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                },
+                {
+                    "name": "throughput",
+                    "type": "throughput",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg"
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "transformer_optimizer": {
+            "type": "orttransformersoptimization",
+            "model_type": "clip",
+            "opt_level": 0,
+            "float16": true,
+            "use_gpu": true,
+            "keep_io_types": false,
+            "optimization_options": {
+                "enable_gelu": true,
+                "enable_layer_norm": true,
+                "enable_attention": true,
+                "enable_skip_layer_norm": false,
+                "enable_embed_layer_norm": false,
+                "enable_bias_skip_layer_norm": false,
+                "enable_bias_gelu": false,
+                "enable_gelu_approximation": false,
+                "enable_qordered_matmul": false,
+                "enable_shape_inference": true,
+                "enable_gemm_fast_gelu": false,
+                "enable_nhwc_conv": false,
+                "enable_group_norm": false,
+                "enable_bias_splitgelu": false,
+                "enable_packed_qkv": true,
+                "enable_packed_kv": true,
+                "enable_bias_add": false,
+                "enable_rotary_embeddings": true
+            },
+            "save_as_external_data": true
+        }
+    },
+    "search_strategy": false,
+    "host": "host_system",
+    "target": "target_system",
+    "cache_dir": "cache",
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "output_dir": "model/clip"
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
new file mode 100644
index 00000000..d17c25fa
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config
@@ -0,0 +1,84 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.dataset_name",
+                    "values": [
+                        "nlphuji/flickr30k"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.dataset_name",
+                        "values": [
+                            "nlphuji/flickr30k"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].load_dataset_config.end",
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.end",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb
new file mode 100644
index 00000000..9e2d7c1c
--- /dev/null
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"WebGpuExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/openai-clip-vit-base-patch32/aitk/_copy.json.config b/openai-clip-vit-base-patch32/aitk/_copy.json.config
index a771d852..8c3918b3 100644
--- a/openai-clip-vit-base-patch32/aitk/_copy.json.config
+++ b/openai-clip-vit-base-patch32/aitk/_copy.json.config
@@ -180,6 +180,31 @@
                 }
             ]
         },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json",
+            "dst": "openai_clip_webgpu.json",
+            "replacements": [
+                {
+                    "find": "openai/clip-vit-base-patch16",
+                    "replace": "openai/clip-vit-base-patch32"
+                }
+            ]
+        },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config",
+            "dst": "openai_clip_webgpu.json.config",
+            "replacements": []
+        },
+        {
+            "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb",
+            "dst": "openai_clip_webgpu_inference_sample.ipynb",
+            "replacements": [
+                {
+                    "find": "openai/clip-vit-base-patch16",
+                    "replace": "openai/clip-vit-base-patch32"
+                }
+            ]
+        },
         {
             "src": "../../openai-clip-vit-base-patch16/aitk/clip_script.py",
             "dst": "clip_script.py"
diff --git a/openai-clip-vit-base-patch32/aitk/info.yml b/openai-clip-vit-base-patch32/aitk/info.yml
index 86f4fee9..4e8c7593 100644
--- a/openai-clip-vit-base-patch32/aitk/info.yml
+++ b/openai-clip-vit-base-patch32/aitk/info.yml
@@ -25,6 +25,9 @@ recipes:
     - file: "openai_clip_dml.json"
       device: gpu
       ep: DmlExecutionProvider
+    - file: "openai_clip_webgpu.json"
+      device: gpu
+      ep: WebGpuExecutionProvider
 aitk:
     modelInfo:
         id: "huggingface/openai/clip-vit-base-patch32"
diff --git a/openai-clip-vit-base-patch32/aitk/model_project.config b/openai-clip-vit-base-patch32/aitk/model_project.config
index f2bd423e..234e99a8 100644
--- a/openai-clip-vit-base-patch32/aitk/model_project.config
+++ b/openai-clip-vit-base-patch32/aitk/model_project.config
@@ -23,6 +23,10 @@
         {
             "file": "openai_clip_dml.json",
             "templateName": "openai_clip_dml"
+        },
+        {
+            "file": "openai_clip_webgpu.json",
+            "templateName": "openai_clip_webgpu"
         }
     ],
     "modelInfo": {
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
new file mode 100644
index 00000000..205dd9fa
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
@@ -0,0 +1,206 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "openai/clip-vit-base-patch32",
+        "task": "zero-shot-image-classification",
+        "load_kwargs": {
+            "attn_implementation": "eager"
+        },
+        "io_config": {
+            "input_names": [
+                "input_ids",
+                "pixel_values",
+                "attention_mask"
+            ],
+            "input_shapes": [
+                [
+                    10,
+                    77
+                ],
+                [
+                    1,
+                    3,
+                    224,
+                    224
+                ],
+                [
+                    10,
+                    77
+                ]
+            ],
+            "input_types": [
+                "int64",
+                "float32",
+                "int64"
+            ],
+            "output_names": [
+                "logits_per_image",
+                "logits_per_text",
+                "text_embeds",
+                "image_embeds"
+            ],
+            "output_shapes": [
+                [
+                    1,
+                    10
+                ],
+                [
+                    10,
+                    1
+                ],
+                [
+                    10,
+                    512
+                ],
+                [
+                    1,
+                    512
+                ]
+            ]
+        }
+    },
+    "systems": {
+        "host_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "cpu",
+                    "execution_providers": [
+                        "CPUExecutionProvider"
+                    ]
+                }
+            ]
+        },
+        "target_system": {
+            "type": "LocalSystem",
+            "accelerators": [
+                {
+                    "device": "gpu",
+                    "execution_providers": [
+                        "WebGpuExecutionProvider"
+                    ]
+                }
+            ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "metric_data_config",
+            "user_script": "user_script.py",
+            "load_dataset_config": {
+                "type": "clip_dataset",
+                "model_name": "openai/clip-vit-base-patch32",
+                "dataset_name": "nlphuji/flickr30k",
+                "start": 0,
+                "end": 10
+            },
+            "dataloader_config": {
+                "type": "no_auto_batch_dataloader"
+            },
+            "post_process_data_config": {
+                "type": "clip_post_process"
+            }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy",
+                    "type": "accuracy",
+                    "backend": "huggingface_metrics",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy",
+                            "priority": 1,
+                            "goal": {
+                                "type": "max-degradation",
+                                "value": 0.05
+                            }
+                        }
+                    ]
+                },
+                {
+                    "name": "latency",
+                    "type": "latency",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg",
+                            "goal": {
+                                "type": "percent-min-improvement",
+                                "value": 0.1
+                            }
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                },
+                {
+                    "name": "throughput",
+                    "type": "throughput",
+                    "data_config": "metric_data_config",
+                    "sub_types": [
+                        {
+                            "name": "avg"
+                        },
+                        {
+                            "name": "max"
+                        },
+                        {
+                            "name": "min"
+                        }
+                    ]
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true
+        },
+        "transformer_optimizer": {
+            "type": "orttransformersoptimization",
+            "model_type": "clip",
+            "opt_level": 0,
+            "float16": true,
+            "use_gpu": true,
+            "keep_io_types": false,
+            "optimization_options": {
+                "enable_gelu": true,
+                "enable_layer_norm": true,
+                "enable_attention": true,
+                "enable_skip_layer_norm": false,
+                "enable_embed_layer_norm": false,
+                "enable_bias_skip_layer_norm": false,
+                "enable_bias_gelu": false,
+                "enable_gelu_approximation": false,
+                "enable_qordered_matmul": false,
+                "enable_shape_inference": true,
+                "enable_gemm_fast_gelu": false,
+                "enable_nhwc_conv": false,
+                "enable_group_norm": false,
+                "enable_bias_splitgelu": false,
+                "enable_packed_qkv": true,
+                "enable_packed_kv": true,
+                "enable_bias_add": false,
+                "enable_rotary_embeddings": true
+            },
+            "save_as_external_data": true
+        }
+    },
+    "search_strategy": false,
+    "host": "host_system",
+    "target": "target_system",
+    "cache_dir": "cache",
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "output_dir": "model/clip"
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config
new file mode 100644
index 00000000..d17c25fa
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config
@@ -0,0 +1,84 @@
+{
+    "name": "Convert to WebGPU",
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "WebGPU"
+        ],
+        "path": "systems.target_system.accelerators.0.execution_providers.0",
+        "values": [
+            "WebGpuExecutionProvider"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.conversion",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        },
+        {
+            "name": "Evaluate",
+            "phase": "Evaluation",
+            "parameters": [
+                {
+                    "name": "Evaluation Dataset",
+                    "tags": [
+                        "EvaluationDataset"
+                    ],
+                    "type": "enum",
+                    "path": "data_configs[0].load_dataset_config.dataset_name",
+                    "values": [
+                        "nlphuji/flickr30k"
+                    ],
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.dataset_name",
+                        "values": [
+                            "nlphuji/flickr30k"
+                        ],
+                        "template": "EvaluationDataset"
+                    }
+                },
+                {
+                    "name": "Evaluation Dataset Size",
+                    "type": "int",
+                    "path": "data_configs[0].load_dataset_config.end",
+                    "template": {
+                        "path": "data_configs[0].load_dataset_config.end",
+                        "template": "EvaluationDatasetSize"
+                    }
+                }
+            ],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Evaluate model performance",
+                "type": "bool",
+                "path": "evaluator",
+                "actions": [
+                    [],
+                    [
+                        {
+                            "type": "delete",
+                            "path": "evaluator"
+                        }
+                    ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb
new file mode 100644
index 00000000..015cd8a1
--- /dev/null
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aeb33f1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "onnx_model_path = \"./model/model.onnx\"\n",
+    "ExecutionProvider=\"WebGpuExecutionProvider\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22477669",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
+    "import subprocess\n",
+    "import json\n",
+    "import sys\n",
+    "import os\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "def register_execution_providers():\n",
+    "    worker_script = os.path.abspath('winml.py')\n",
+    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
+    "    paths = json.loads(result)\n",
+    "    for item in paths.items():\n",
+    "        ort.register_execution_provider_library(item[0], item[1])\n",
+    "\n",
+    "register_execution_providers()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "307fcca8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "import requests\n",
+    " \n",
+    "from transformers import CLIPProcessor\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "import torch\n",
+    " \n",
+    "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\", use_fast=False)\n",
+    " \n",
+    "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n",
+    "image = Image.open(requests.get(url, stream=True).raw)\n",
+    " \n",
+    "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n",
+    "                images=image, return_tensors=\"np\", padding=\"max_length\",\n",
+    "                max_length= 77, truncation=True)\n",
+    " \n",
+    "\n",
+    "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n",
+    "    ep_devices = ort.get_ep_devices()\n",
+    "    for ep_device in ep_devices:\n",
+    "        if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n",
+    "            print(f\"Adding {ep_name} for {device_type}\")\n",
+    "            session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n",
+    "            break\n",
+    " \n",
+    "opts = ort.SessionOptions()\n",
+    " \n",
+    "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n",
+    "assert opts.has_providers()\n",
+    "\n",
+    "# options = ort.SessionOptions()\n",
+    "session = ort.InferenceSession(onnx_model_path,\n",
+    "    sess_options=opts,\n",
+    "    # providers=[ExecutionProvider],\n",
+    "    # provider_options=[provider_options]\n",
+    ")\n",
+    "logits_per_image = session.run([\"logits_per_image\"],\n",
+    "                     {\n",
+    "                        \"input_ids\": inputs['input_ids'].astype(np.int64),\n",
+    "                        \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n",
+    "                        \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n",
+    "                    })\n",
+    " \n",
+    "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n",
+    "print(\"Label probs:\", probs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "winml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 355b2fe96b1a45b97948a3973dfbda2560179c08 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Wed, 20 Aug 2025 14:54:21 +0800
Subject: [PATCH 08/14] update olive, fix clip

---
 .aitk/requirements/Intel/Test_py3.12.9.txt    |  4 +--
 .../requirements-IntelNPU-Nightly.txt         |  2 +-
 .aitk/requirements/requirements-IntelNPU.txt  |  4 +--
 .aitk/requirements/requirements-NvidiaGPU.txt |  4 +--
 .aitk/requirements/requirements-QNN.txt       |  4 +--
 .aitk/requirements/requirements-WCR.txt       |  4 +--
 .aitk/requirements/requirements-WCR_CUDA.txt  |  4 +--
 .aitk/scripts/install_freeze.py               |  2 +-
 .../aitk/laion_clip_webgpu.json               | 29 ++-----------------
 .../aitk/openai_clip_webgpu.json              | 29 ++-----------------
 .../aitk/openai_clip_webgpu.json              | 29 ++-----------------
 11 files changed, 20 insertions(+), 95 deletions(-)

diff --git a/.aitk/requirements/Intel/Test_py3.12.9.txt b/.aitk/requirements/Intel/Test_py3.12.9.txt
index 8f922797..127a389c 100644
--- a/.aitk/requirements/Intel/Test_py3.12.9.txt
+++ b/.aitk/requirements/Intel/Test_py3.12.9.txt
@@ -11,8 +11,8 @@ typing-extensions==4.13.2
 onnx==1.17.0
 numpy==1.26.4
 protobuf==6.31.1
-# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
-olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
+# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
+olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
 numpy==1.26.4
 onnx==1.17.0
 onnx-ir==0.1.5
diff --git a/.aitk/requirements/requirements-IntelNPU-Nightly.txt b/.aitk/requirements/requirements-IntelNPU-Nightly.txt
index e41a4bf5..a9cef2cc 100644
--- a/.aitk/requirements/requirements-IntelNPU-Nightly.txt
+++ b/.aitk/requirements/requirements-IntelNPU-Nightly.txt
@@ -1,4 +1,4 @@
-# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai depends on optimum[openvino]<=1.24
+# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai depends on optimum[openvino]<=1.24
 # optimum[openvino]==1.24.0 depends on transformers<4.49.0
 # phi4 depends on transformers>=4.49.0
 transformers==4.49.0
diff --git a/.aitk/requirements/requirements-IntelNPU.txt b/.aitk/requirements/requirements-IntelNPU.txt
index 3276a394..b5ce8251 100644
--- a/.aitk/requirements/requirements-IntelNPU.txt
+++ b/.aitk/requirements/requirements-IntelNPU.txt
@@ -11,8 +11,8 @@ typing-extensions==4.13.2
 onnx==1.17.0
 numpy==1.26.4
 protobuf==6.30.2
-# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
-olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
+# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
+olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
 numpy==1.26.4
 onnx==1.17.0
 onnx-ir==0.1.5
diff --git a/.aitk/requirements/requirements-NvidiaGPU.txt b/.aitk/requirements/requirements-NvidiaGPU.txt
index 74924d54..7e63e65f 100644
--- a/.aitk/requirements/requirements-NvidiaGPU.txt
+++ b/.aitk/requirements/requirements-NvidiaGPU.txt
@@ -11,8 +11,8 @@ typing-extensions==4.13.2
 onnx==1.17.0
 numpy==2.2.4
 protobuf==3.20.3
-# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
-olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
+# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
+olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
 onnx-ir==0.1.5
 onnxscript==0.3.2
 optuna==4.3.0
diff --git a/.aitk/requirements/requirements-QNN.txt b/.aitk/requirements/requirements-QNN.txt
index 25c93b54..79d145c5 100644
--- a/.aitk/requirements/requirements-QNN.txt
+++ b/.aitk/requirements/requirements-QNN.txt
@@ -2,8 +2,8 @@
 onnx==1.17.0
 numpy==2.2.4
 protobuf==3.20.3
-# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
-olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
+# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
+olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
 numpy==2.2.4
 onnx==1.17.0
 onnx-ir==0.1.5
diff --git a/.aitk/requirements/requirements-WCR.txt b/.aitk/requirements/requirements-WCR.txt
index 7e3a9ce9..8c5ef6af 100644
--- a/.aitk/requirements/requirements-WCR.txt
+++ b/.aitk/requirements/requirements-WCR.txt
@@ -2,8 +2,8 @@
 onnx==1.17.0
 numpy==2.2.4
 protobuf==6.30.2
-# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
-olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
+# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
+olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
 onnx-ir==0.1.5
 onnxscript==0.3.2
 optuna==4.2.1
diff --git a/.aitk/requirements/requirements-WCR_CUDA.txt b/.aitk/requirements/requirements-WCR_CUDA.txt
index ceb0a03d..6564f7c3 100644
--- a/.aitk/requirements/requirements-WCR_CUDA.txt
+++ b/.aitk/requirements/requirements-WCR_CUDA.txt
@@ -12,8 +12,8 @@ typing-extensions==4.14.0
 onnx==1.17.0
 numpy==2.2.4
 protobuf==6.30.2
-# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
-olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai
+# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
+olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai
 onnx-ir==0.1.5
 onnxscript==0.3.2
 optuna==4.2.1
diff --git a/.aitk/scripts/install_freeze.py b/.aitk/scripts/install_freeze.py
index 3f4bea20..b328824f 100644
--- a/.aitk/scripts/install_freeze.py
+++ b/.aitk/scripts/install_freeze.py
@@ -60,7 +60,7 @@ def get_requires(name: str, args):
 def main():
     # Constants
     # if from git: "git+https://github.com/microsoft/Olive.git@COMMIT_ID#egg=olive_ai
-    oliveAi = "olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai"
+    oliveAi = "olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai"
     torchVision = "torchvision==0.22.0"
     pre = {
         RuntimeEnum.NvidiaGPU: [
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
index 947df9d3..bb76bded 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json
@@ -166,33 +166,8 @@
             "target_opset": 17,
             "save_as_external_data": true
         },
-        "transformer_optimizer": {
-            "type": "orttransformersoptimization",
-            "model_type": "clip",
-            "opt_level": 0,
-            "float16": true,
-            "use_gpu": true,
-            "keep_io_types": false,
-            "optimization_options": {
-                "enable_gelu": true,
-                "enable_layer_norm": true,
-                "enable_attention": true,
-                "enable_skip_layer_norm": false,
-                "enable_embed_layer_norm": false,
-                "enable_bias_skip_layer_norm": false,
-                "enable_bias_gelu": false,
-                "enable_gelu_approximation": false,
-                "enable_qordered_matmul": false,
-                "enable_shape_inference": true,
-                "enable_gemm_fast_gelu": false,
-                "enable_nhwc_conv": false,
-                "enable_group_norm": false,
-                "enable_bias_splitgelu": false,
-                "enable_packed_qkv": true,
-                "enable_packed_kv": true,
-                "enable_bias_add": false,
-                "enable_rotary_embeddings": true
-            },
+        "onnx_float_to_float16": {
+            "type": "OnnxFloatToFloat16",
             "save_as_external_data": true
         }
     },
diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
index 97493fc9..39b77871 100644
--- a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
+++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json
@@ -166,33 +166,8 @@
             "target_opset": 17,
             "save_as_external_data": true
         },
-        "transformer_optimizer": {
-            "type": "orttransformersoptimization",
-            "model_type": "clip",
-            "opt_level": 0,
-            "float16": true,
-            "use_gpu": true,
-            "keep_io_types": false,
-            "optimization_options": {
-                "enable_gelu": true,
-                "enable_layer_norm": true,
-                "enable_attention": true,
-                "enable_skip_layer_norm": false,
-                "enable_embed_layer_norm": false,
-                "enable_bias_skip_layer_norm": false,
-                "enable_bias_gelu": false,
-                "enable_gelu_approximation": false,
-                "enable_qordered_matmul": false,
-                "enable_shape_inference": true,
-                "enable_gemm_fast_gelu": false,
-                "enable_nhwc_conv": false,
-                "enable_group_norm": false,
-                "enable_bias_splitgelu": false,
-                "enable_packed_qkv": true,
-                "enable_packed_kv": true,
-                "enable_bias_add": false,
-                "enable_rotary_embeddings": true
-            },
+        "onnx_float_to_float16": {
+            "type": "OnnxFloatToFloat16",
             "save_as_external_data": true
         }
     },
diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
index 205dd9fa..6198b710 100644
--- a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
+++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json
@@ -166,33 +166,8 @@
             "target_opset": 17,
             "save_as_external_data": true
         },
-        "transformer_optimizer": {
-            "type": "orttransformersoptimization",
-            "model_type": "clip",
-            "opt_level": 0,
-            "float16": true,
-            "use_gpu": true,
-            "keep_io_types": false,
-            "optimization_options": {
-                "enable_gelu": true,
-                "enable_layer_norm": true,
-                "enable_attention": true,
-                "enable_skip_layer_norm": false,
-                "enable_embed_layer_norm": false,
-                "enable_bias_skip_layer_norm": false,
-                "enable_bias_gelu": false,
-                "enable_gelu_approximation": false,
-                "enable_qordered_matmul": false,
-                "enable_shape_inference": true,
-                "enable_gemm_fast_gelu": false,
-                "enable_nhwc_conv": false,
-                "enable_group_norm": false,
-                "enable_bias_splitgelu": false,
-                "enable_packed_qkv": true,
-                "enable_packed_kv": true,
-                "enable_bias_add": false,
-                "enable_rotary_embeddings": true
-            },
+        "onnx_float_to_float16": {
+            "type": "OnnxFloatToFloat16",
             "save_as_external_data": true
         }
     },

From 0a9345f0c50cdf6496bb0886b02aa7f6a95731ff Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Wed, 20 Aug 2025 15:58:25 +0800
Subject: [PATCH 09/14] fix comments

---
 .aitk/configs/model_list.json                 | 26 +++++++++----------
 .aitk/scripts/model_lab/__init__.py           |  2 +-
 .aitk/scripts/sanitize/utils.py               |  6 ++---
 .../aitk/qwen2_5_webgpu_config.json           |  2 +-
 .../aitk/deepseek_webgpu_config.json          |  2 +-
 .../aitk/inference_sample.ipynb               | 20 +-------------
 ...ase-patch16-224_dml_inference_sample.ipynb | 20 +-------------
 ...-patch16-224_trtrtx_inference_sample.ipynb | 20 +-------------
 ...-patch16-224_webgpu_inference_sample.ipynb | 20 +-------------
 .../aitk/llama3_2_webgpu_config.json          |  2 +-
 .../aitk/phi3_5_webgpu_config.json            |  2 +-
 11 files changed, 25 insertions(+), 97 deletions(-)

diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index eb795b84..f46f7170 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -28,7 +28,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -78,7 +78,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -98,7 +98,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -118,7 +118,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -138,7 +138,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -158,7 +158,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -208,7 +208,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -282,7 +282,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "CNN",
             "status": "Ready",
@@ -315,7 +315,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -335,7 +335,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -385,7 +385,7 @@
                 "IntelGPU",
                 "IntelNPU",
                 "DML",
-                "WEBGPU"
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -592,7 +592,7 @@
         "NVIDIA GPU": "NvidiaGPU",
         "NVIDIA TensorRT for RTX": "NvidiaTRTRTX",
         "DirectML": "DML",
-        "WebGPU": "WEBGPU"
+        "WebGPU": "WebGPU"
     },
     "RuntimeToDisplayName": {
         "CPU": "CPU",
@@ -605,6 +605,6 @@
         "NvidiaGPU": "NVIDIA GPU",
         "NvidiaTRTRTX": "NVIDIA TensorRT for RTX",
         "DML": "DirectML",
-        "WEBGPU": "WebGPU"
+        "WebGPU": "WebGPU"
     }
 }
diff --git a/.aitk/scripts/model_lab/__init__.py b/.aitk/scripts/model_lab/__init__.py
index 7ff8cf8b..d0b95633 100644
--- a/.aitk/scripts/model_lab/__init__.py
+++ b/.aitk/scripts/model_lab/__init__.py
@@ -11,7 +11,7 @@ class RuntimeEnum(Enum):
     IntelGPU = "IntelGPU"
     IntelNPU = "IntelNPU"
     DML = "DML"
-    WEBGPU = "WEBGPU"
+    WebGPU = "WebGPU"
     NvidiaGPU = "NvidiaGPU"
     WCR = "WCR"
     WCR_CUDA = "WCR_CUDA"
diff --git a/.aitk/scripts/sanitize/utils.py b/.aitk/scripts/sanitize/utils.py
index 97806f90..4fa746f7 100644
--- a/.aitk/scripts/sanitize/utils.py
+++ b/.aitk/scripts/sanitize/utils.py
@@ -43,7 +43,7 @@ class GlobalVars:
         RuntimeEnum.NvidiaGPU: EPNames.CUDAExecutionProvider,
         RuntimeEnum.NvidiaTRTRTX: EPNames.NvTensorRTRTXExecutionProvider,
         RuntimeEnum.DML: EPNames.DmlExecutionProvider,
-        RuntimeEnum.WEBGPU: EPNames.WebGpuExecutionProvider,
+        RuntimeEnum.WebGPU: EPNames.WebGpuExecutionProvider,
     }
     RuntimeToOliveDeviceType = {
         RuntimeEnum.CPU: OliveDeviceTypes.CPU,
@@ -55,7 +55,7 @@ class GlobalVars:
         RuntimeEnum.AMDNPU: OliveDeviceTypes.NPU,
         RuntimeEnum.NvidiaGPU: OliveDeviceTypes.GPU,
         RuntimeEnum.DML: OliveDeviceTypes.GPU,
-        RuntimeEnum.WEBGPU: OliveDeviceTypes.GPU,
+        RuntimeEnum.WebGPU: OliveDeviceTypes.GPU,
     }
     RuntimeToDisplayName = {
         RuntimeEnum.CPU: "CPU",
@@ -68,7 +68,7 @@ class GlobalVars:
         RuntimeEnum.NvidiaGPU: "NVIDIA GPU",
         RuntimeEnum.NvidiaTRTRTX: "NVIDIA TensorRT for RTX",
         RuntimeEnum.DML: "DirectML",
-        RuntimeEnum.WEBGPU: "WebGPU",
+        RuntimeEnum.WebGPU: "WebGPU",
     }
 
     @classmethod
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
index 8c4c0f8c..d4ae439c 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json
@@ -35,4 +35,4 @@
     "cache_dir": "cache",
     "no_artifacts": true,
     "evaluate_input_model": false
-}
\ No newline at end of file
+}
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
index 34cd324f..fd193ded 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json
@@ -35,4 +35,4 @@
     "cache_dir": "cache",
     "no_artifacts": true,
     "evaluate_input_model": false
-}
\ No newline at end of file
+}
diff --git a/google-vit-base-patch16-224/aitk/inference_sample.ipynb b/google-vit-base-patch16-224/aitk/inference_sample.ipynb
index ddcf70b1..f2dee65a 100644
--- a/google-vit-base-patch16-224/aitk/inference_sample.ipynb
+++ b/google-vit-base-patch16-224/aitk/inference_sample.ipynb
@@ -209,25 +209,7 @@
    ]
   }
  ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python-WCR-win32-x64-3.12.9",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.9"
-  }
- },
+ "metadata": {},
  "nbformat": 4,
  "nbformat_minor": 4
 }
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb
index e7c6d91e..19adf2a8 100644
--- a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb
@@ -209,25 +209,7 @@
    ]
   }
  ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python-WCR-win32-x64-3.12.9",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.9"
-  }
- },
+ "metadata": {},
  "nbformat": 4,
  "nbformat_minor": 4
 }
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb
index c7bb23c5..7b3c0075 100644
--- a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb
@@ -209,25 +209,7 @@
    ]
   }
  ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python-WCR-win32-x64-3.12.9",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.9"
-  }
- },
+ "metadata": {},
  "nbformat": 4,
  "nbformat_minor": 4
 }
diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb
index fa55eb61..4f3ab189 100644
--- a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb
+++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb
@@ -209,25 +209,7 @@
    ]
   }
  ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python-WCR-win32-x64-3.12.9",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.9"
-  }
- },
+ "metadata": {},
  "nbformat": 4,
  "nbformat_minor": 4
 }
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
index c27b5153..4a2e2a3f 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json
@@ -35,4 +35,4 @@
     "cache_dir": "cache",
     "no_artifacts": true,
     "evaluate_input_model": false
-}
\ No newline at end of file
+}
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
index 71037506..3f1a4b36 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json
@@ -35,4 +35,4 @@
     "cache_dir": "cache",
     "no_artifacts": true,
     "evaluate_input_model": false
-}
\ No newline at end of file
+}

From ccbac486caa11e4d9bc8c83457f02bbb2aee6029 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Tue, 26 Aug 2025 11:18:54 +0800
Subject: [PATCH 10/14] update readme

---
 Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md                | 1 +
 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md | 1 +
 google-bert-bert-base-multilingual-cased/aitk/README.md  | 2 +-
 google-vit-base-patch16-224/aitk/README.md               | 2 +-
 intel-bert-base-uncased-mrpc/aitk/README.md              | 2 +-
 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md     | 4 ++--
 meta-llama-Llama-3.2-1B-Instruct/aitk/README.md          | 1 +
 microsoft-Phi-3.5-mini-instruct/aitk/README.md           | 1 +
 microsoft-resnet-50/aitk/README.md                       | 2 +-
 openai-clip-vit-base-patch16/aitk/README.md              | 4 ++--
 openai-clip-vit-base-patch32/aitk/README.md              | 4 ++--
 11 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md
index 5eb0ccc1..1722fceb 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md
@@ -7,6 +7,7 @@ This repository demonstrates the optimization of the [Qwen2.5-1.5B-Instruct](htt
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md
index c34dd7c3..7977e246 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md
@@ -7,6 +7,7 @@ This repository demonstrates the optimization of the [DeepSeek-R1-Distill-Qwen-1
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 
diff --git a/google-bert-bert-base-multilingual-cased/aitk/README.md b/google-bert-bert-base-multilingual-cased/aitk/README.md
index 46ba8a03..70a8e739 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/README.md
+++ b/google-bert-bert-base-multilingual-cased/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of BERT optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## QDQ for Qualcomm NPU / AMD NPU
 
diff --git a/google-vit-base-patch16-224/aitk/README.md b/google-vit-base-patch16-224/aitk/README.md
index 328dea84..00cdce44 100644
--- a/google-vit-base-patch16-224/aitk/README.md
+++ b/google-vit-base-patch16-224/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of VIT optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Optimization Workflows
 
diff --git a/intel-bert-base-uncased-mrpc/aitk/README.md b/intel-bert-base-uncased-mrpc/aitk/README.md
index 04380886..6cedd50d 100644
--- a/intel-bert-base-uncased-mrpc/aitk/README.md
+++ b/intel-bert-base-uncased-mrpc/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of BERT optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## QDQ for Qualcomm NPU / AMD NPU
 
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md
index 0704a091..b55283ed 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md
@@ -5,7 +5,7 @@ This folder contains examples of Laion Clip optimization using different workflo
 - Text and vision model QDQ for Qualcomm NPU
 - QDQ for AMD NPU
 - OpenVINO for Intel NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Laion Clip text optimization with QDQ for Qualcomm NPU
 
@@ -48,7 +48,7 @@ This example performs Laion Clip optimization with QDQ in one workflow. It perfo
 
 This example performs Laion Clip optimization with OpenVINO in one workflow for Intel NPU.
 
-## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 It performs the optimization pipeline:
 
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md
index 09ecd4bb..37c417c8 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md
@@ -7,6 +7,7 @@ This repository demonstrates the optimization of the [Llama-3.2-1B-Instruct](htt
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/README.md b/microsoft-Phi-3.5-mini-instruct/aitk/README.md
index d045b20c..2c1cf644 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/README.md
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/README.md
@@ -7,6 +7,7 @@ This repository demonstrates the optimization of the [Microsoft Phi-3.5 Mini Ins
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 
diff --git a/microsoft-resnet-50/aitk/README.md b/microsoft-resnet-50/aitk/README.md
index 1d38f19e..71044382 100644
--- a/microsoft-resnet-50/aitk/README.md
+++ b/microsoft-resnet-50/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of ResNet optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
 - OpenVINO for Intel NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## QDQ for Qualcomm NPU / AMD NPU
 
diff --git a/openai-clip-vit-base-patch16/aitk/README.md b/openai-clip-vit-base-patch16/aitk/README.md
index 84c588bc..036bf19a 100644
--- a/openai-clip-vit-base-patch16/aitk/README.md
+++ b/openai-clip-vit-base-patch16/aitk/README.md
@@ -5,7 +5,7 @@ This folder contains examples of Openai Clip optimization using different workfl
 - Text and vision model QDQ for Qualcomm NPU
 - QDQ for AMD NPU
 - OpenVINO for Intel NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Openai Clip text optimization with QDQ for Qualcomm NPU
 
@@ -48,7 +48,7 @@ This example performs Openai Clip optimization with QDQ in one workflow. It perf
 
 This example performs Openai Clip optimization with OpenVINO in one workflow for Intel NPU.
 
-## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 It performs the optimization pipeline:
 
diff --git a/openai-clip-vit-base-patch32/aitk/README.md b/openai-clip-vit-base-patch32/aitk/README.md
index 84c588bc..036bf19a 100644
--- a/openai-clip-vit-base-patch32/aitk/README.md
+++ b/openai-clip-vit-base-patch32/aitk/README.md
@@ -5,7 +5,7 @@ This folder contains examples of Openai Clip optimization using different workfl
 - Text and vision model QDQ for Qualcomm NPU
 - QDQ for AMD NPU
 - OpenVINO for Intel NPU
-- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Openai Clip text optimization with QDQ for Qualcomm NPU
 
@@ -48,7 +48,7 @@ This example performs Openai Clip optimization with QDQ in one workflow. It perf
 
 This example performs Openai Clip optimization with OpenVINO in one workflow for Intel NPU.
 
-## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU
+## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 It performs the optimization pipeline:
 

From 025f9e969451f86f85f1544255570d6b551d5cb6 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Tue, 14 Oct 2025 17:51:39 +0800
Subject: [PATCH 11/14] sanitize

---
 .aitk/configs/checks.json                     |  19 +-
 .aitk/configs/model_list.json                 | 174 +-----------------
 .aitk/docs/guide/ModelList.md                 |  22 +--
 .../aitk/qwen2_5_webgpu_config.json.config    |   1 +
 .../aitk/deepseek_webgpu_config.json.config   |   1 +
 .../aitk/README.md                            |   2 +-
 .../aitk/llama3_2_webgpu_config.json.config   |   1 +
 .../aitk/phi3_5_webgpu_config.json.config     |   1 +
 .../aitk/inference_sample.ipynb               |  25 ---
 .../aitk/inference_sample.ipynb               |  25 ---
 .../aitk/inference_sample.ipynb               |  25 ---
 11 files changed, 34 insertions(+), 262 deletions(-)

diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json
index d9b8e40d..6c79cbf4 100644
--- a/.aitk/configs/checks.json
+++ b/.aitk/configs/checks.json
@@ -1,13 +1,16 @@
 {
-    "configCheck": 101,
+    "configCheck": 109,
+    "copyCheck": 169,
     "extensionCheck": 1,
-    "gitignoreCheck": 32,
-    "inferenceModelCheck": 22,
-    "ipynbCheck": 56,
-    "modelProjectCheck": 33,
+    "gitignoreCheck": 35,
+    "inferenceModelCheck": 25,
+    "ipynbCheck": 53,
+    "licenseCheck": 34,
+    "modelProjectCheck": 36,
     "oliveCheck": 0,
-    "oliveJsonCheck": 101,
-    "pathCheck": 792,
-    "requirementsCheck": 32,
+    "oliveJsonCheck": 109,
+    "pathCheck": 704,
+    "requirementsCheck": 35,
+    "templateCheck": 1,
     "venvRequirementsCheck": 12
 }
diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json
index 6c903a5a..2d0861a7 100644
--- a/.aitk/configs/model_list.json
+++ b/.aitk/configs/model_list.json
@@ -12,7 +12,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -32,7 +33,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "CNN",
             "status": "Ready",
@@ -168,146 +170,6 @@
             "version": 3,
             "p0": true
         },
-        {
-            "displayName": "microsoft/Phi-3-mini-128k-instruct",
-            "icon": "ms",
-            "modelLink": "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct",
-            "id": "huggingface/microsoft/Phi-3-mini-128k-instruct",
-            "groupId": "huggingface/microsoft/Phi-3-mini-4k-instruct",
-            "groupItemName": "128K",
-            "runtimes": [
-                "IntelGPU"
-            ],
-            "architecture": "Transformer",
-            "status": "Ready",
-            "relativePath": "microsoft-Phi-3-mini-128k-instruct/aitk",
-            "version": 2
-        },
-        {
-            "displayName": "microsoft/Phi-3-mini-4k-instruct",
-            "icon": "ms",
-            "modelLink": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
-            "id": "huggingface/microsoft/Phi-3-mini-4k-instruct",
-            "groupId": "huggingface/microsoft/Phi-3-mini-4k-instruct",
-            "groupItemName": "4K",
-            "runtimes": [
-                "IntelGPU",
-                "IntelNPU"
-            ],
-            "architecture": "Transformer",
-            "status": "Ready",
-            "relativePath": "microsoft-Phi-3-mini-4k-instruct/aitk",
-            "version": 2
-        },
-        {
-            "displayName": "microsoft/Phi-3.5-mini-instruct",
-            "icon": "ms",
-            "modelLink": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
-            "id": "huggingface/microsoft/Phi-3.5-mini-instruct",
-            "runtimes": [
-                "QNN",
-                "AMDNPU",
-                "NvidiaTRTRTX",
-                "IntelCPU",
-                "IntelGPU",
-                "IntelNPU",
-                "DML",
-                "WebGPU"
-            ],
-            "architecture": "Transformer",
-            "status": "Ready",
-            "relativePath": "microsoft-Phi-3.5-mini-instruct/aitk",
-            "version": 1,
-            "p0": true
-        },
-        {
-            "displayName": "microsoft/Phi-4-mini-instruct",
-            "icon": "ms",
-            "modelLink": "https://huggingface.co/microsoft/Phi-4-mini-instruct",
-            "id": "huggingface/microsoft/Phi-4-mini-instruct",
-            "runtimes": [
-                "IntelGPU",
-                "IntelNPU"
-            ],
-            "architecture": "Transformer",
-            "status": "Ready",
-            "relativePath": "microsoft-Phi-4-mini-instruct/aitk",
-            "version": 2
-        },
-        {
-            "displayName": "microsoft/Phi-4-mini-reasoning",
-            "icon": "ms",
-            "modelLink": "https://huggingface.co/microsoft/Phi-4-mini-reasoning",
-            "id": "huggingface/microsoft/Phi-4-mini-reasoning",
-            "runtimes": [
-                "IntelGPU",
-                "IntelNPU"
-            ],
-            "architecture": "Transformer",
-            "status": "Ready",
-            "relativePath": "microsoft-Phi-4-mini-reasoning/aitk",
-            "version": 2
-        },
-        {
-            "displayName": "microsoft/Phi-4-reasoning",
-            "icon": "ms",
-            "modelLink": "https://huggingface.co/microsoft/Phi-4-reasoning",
-            "id": "huggingface/microsoft/Phi-4-reasoning",
-            "runtimes": [
-                "IntelNPU"
-            ],
-            "architecture": "Transformer",
-            "status": "Ready",
-            "relativePath": "microsoft-Phi-4-reasoning/aitk",
-            "version": 2
-        },
-        {
-            "displayName": "microsoft/Phi-4-reasoning-plus",
-            "icon": "ms",
-            "modelLink": "https://huggingface.co/microsoft/Phi-4-reasoning-plus",
-            "id": "huggingface/microsoft/Phi-4-reasoning-plus",
-            "runtimes": [
-                "IntelNPU"
-            ],
-            "architecture": "Transformer",
-            "status": "Ready",
-            "relativePath": "microsoft-Phi-4-reasoning-plus/aitk",
-            "version": 2
-        },
-        {
-            "displayName": "Microsoft/ResNet-50",
-            "icon": "ms",
-            "modelLink": "https://huggingface.co/microsoft/resnet-50",
-            "id": "huggingface/microsoft/resnet-50",
-            "runtimes": [
-                "QNN",
-                "AMDNPU",
-                "NvidiaTRTRTX",
-                "IntelCPU",
-                "IntelGPU",
-                "IntelNPU",
-                "DML",
-                "WebGPU"
-            ],
-            "architecture": "CNN",
-            "status": "Ready",
-            "relativePath": "microsoft-resnet-50/aitk",
-            "version": 1,
-            "p0": true
-        },
-        {
-            "displayName": "mistralai/Mistral-7B-Instruct-v0.3",
-            "icon": "mistralai",
-            "modelLink": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3",
-            "id": "huggingface/mistralai/Mistral-7B-Instruct-v0.3",
-            "runtimes": [
-                "IntelGPU"
-            ],
-            "architecture": "Transformer",
-            "status": "Ready",
-            "relativePath": "mistralai-Mistral-7B-Instruct-v0.3/aitk",
-            "version": 2
-        },
         {
             "displayName": "openai/clip-vit-base-patch16",
             "icon": "OpenAI",
@@ -364,7 +226,8 @@
                 "IntelCPU",
                 "IntelGPU",
                 "IntelNPU",
-                "DML"
+                "DML",
+                "WebGPU"
             ],
             "architecture": "Transformer",
             "status": "Ready",
@@ -589,30 +452,7 @@
             "architecture": "Transformer",
             "status": "Ready",
             "relativePath": "Qwen-Qwen2.5-0.5B-Instruct/aitk",
-            "version": 2
-        },
-        {
-            "displayName": "Qwen/Qwen2.5-1.5B-Instruct",
-            "icon": "qwen",
-            "modelLink": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct",
-            "id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct",
-            "groupId": "huggingface/Qwen/Qwen2.5-1.5B-Instruct",
-            "groupItemName": "1.5B",
-            "runtimes": [
-                "QNN",
-                "AMDNPU",
-                "NvidiaTRTRTX",
-                "IntelCPU",
-                "IntelGPU",
-                "IntelNPU",
-                "DML",
-                "WebGPU"
-            ],
-            "architecture": "Transformer",
-            "status": "Ready",
-            "relativePath": "Qwen-Qwen2.5-1.5B-Instruct/aitk",
-            "version": 1,
-            "p0": true
+            "version": 3
         },
         {
             "displayName": "Qwen/Qwen2.5-3B-Instruct",
diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md
index e55a23d5..c7654b37 100644
--- a/.aitk/docs/guide/ModelList.md
+++ b/.aitk/docs/guide/ModelList.md
@@ -5,23 +5,23 @@
 | Model Name | Supported Runtimes |
 |------------|--------------------|
 | [Deepseek R1 Distill Llama 8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_npu_config.json) |
-| [Deepseek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | [Qualcomm NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json), [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json), [DirectML](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json) |
+| [Deepseek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | [Qualcomm NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json), [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json), [DirectML](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json), [WebGPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json) |
 | [Deepseek R1 Distill Qwen 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_config.json) |
 | [Deepseek R1 Distill Qwen 7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_npu_config.json) |
 | [Llama 3.1 8B Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_qnn_config.json), [AMD NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_config.json), [Intel GPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_config.json), [Intel NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_config.json), [DirectML](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_dml_config.json) |
-| [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json), [AMD NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [Intel GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [Intel NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [DirectML](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json) |
+| [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json), [AMD NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [Intel GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [Intel NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [DirectML](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json), [WebGPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json) |
 | [Mistral 7B Instruct V0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) | [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_gpu_context_ov_dy.json), [Intel NPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_npu_context_ov_dy.json) |
 | [Mistral 7B Instruct V0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) | [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json) |
 | [Phi 3 Mini 128K Instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) | [Intel GPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_config.json) |
 | [Phi 3 Mini 4K Instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | [Intel GPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_config.json), [Intel NPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_npu_config.json) |
-| [Phi 3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json), [AMD NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json), [Intel CPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json), [DirectML](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json) |
+| [Phi 3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json), [AMD NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json), [Intel CPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json), [DirectML](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json), [WebGPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json) |
 | [Phi 4](https://huggingface.co/microsoft/Phi-4) | [Intel GPU](../../../microsoft-Phi-4/aitk/phi4_ov_config.json) |
 | [Phi 4 Mini Instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) | [Intel GPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_config.json), [Intel NPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_npu_config.json) |
 | [Phi 4 Mini Reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) | [Intel GPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json) |
 | [Phi 4 Reasoning](https://huggingface.co/microsoft/Phi-4-reasoning) | [Intel NPU](../../../microsoft-Phi-4-reasoning/aitk/phi4_ov_config.json) |
 | [Phi 4 Reasoning Plus](https://huggingface.co/microsoft/Phi-4-reasoning-plus) | [Intel NPU](../../../microsoft-Phi-4-reasoning-plus/aitk/phi4_ov_config.json) |
 | [Qwen2.5 0.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) | [Intel GPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
-| [Qwen2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json), [AMD NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json), [Intel CPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json), [DirectML](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json) |
+| [Qwen2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json), [AMD NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json), [Intel CPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json), [DirectML](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json), [WebGPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json) |
 | [Qwen2.5 14B Instruct](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct) | [Intel GPU](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_ov_config.json) |
 | [Qwen2.5 3B Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct) | [Intel GPU](../../../Qwen-Qwen2.5-3B-Instruct/aitk/qwen2_5_ov_config.json) |
 | [Qwen2.5 7B Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) | [Intel GPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_npu_config.json) |
@@ -34,10 +34,10 @@
 
 | Model Name | Supported Runtimes |
 |------------|--------------------|
-| [Bert Base Multilingual Cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) | [Qualcomm NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json), [AMD NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json), [Intel CPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [DirectML](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json) |
-| [Bert Base Uncased Mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) | [Qualcomm NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json), [AMD NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../intel-bert-base-uncased-mrpc/aitk/bert_trtrtx.json), [Intel CPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [DirectML](../../../intel-bert-base-uncased-mrpc/aitk/bert_dml.json) |
-| [Clip Vit B 32 Laion2B S34B B79K](https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K) | [Qualcomm NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn.json), [AMD NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json), [Intel CPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [DirectML](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json) |
-| [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json) |
-| [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json) |
-| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json) |
-| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json) |
+| [Bert Base Multilingual Cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) | [Qualcomm NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json), [AMD NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json), [Intel CPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [DirectML](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json), [WebGPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json) |
+| [Bert Base Uncased Mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) | [Qualcomm NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json), [AMD NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../intel-bert-base-uncased-mrpc/aitk/bert_trtrtx.json), [Intel CPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [DirectML](../../../intel-bert-base-uncased-mrpc/aitk/bert_dml.json), [WebGPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json) |
+| [Clip Vit B 32 Laion2B S34B B79K](https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K) | [Qualcomm NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn.json), [AMD NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json), [Intel CPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [DirectML](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json), [WebGPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json) |
+| [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json), [WebGPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json) |
+| [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json), [WebGPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json) |
+| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json), [WebGPU](../../../microsoft-resnet-50/aitk/resnet_webgpu.json) |
+| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json), [WebGPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json) |
diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
index 60a17277..b3e6c90c 100644
--- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
+++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config
@@ -6,6 +6,7 @@
         "autoGenerated": true,
         "useModelBuilder": "builder"
     },
+    "needHFLogin": true,
     "addCpu": false,
     "runtime": {
         "autoGenerated": true,
diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
index 60a17277..b3e6c90c 100644
--- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
+++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config
@@ -6,6 +6,7 @@
         "autoGenerated": true,
         "useModelBuilder": "builder"
     },
+    "needHFLogin": true,
     "addCpu": false,
     "runtime": {
         "autoGenerated": true,
diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md b/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md
index 612cdf2b..8efa8cc5 100644
--- a/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md
+++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md
@@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [Llama-3.1-8B-Instruct](htt
    + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs**
 - OpenVINO for Intel® CPU/GPU/NPU
    + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
-- Float downcasting for NVIDIA TRT for RTX GPU
+- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU
 - DML for general GPU
    + This process uses AutoAWQ and ModelBuilder
 
diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
index 60a17277..b3e6c90c 100644
--- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
+++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config
@@ -6,6 +6,7 @@
         "autoGenerated": true,
         "useModelBuilder": "builder"
     },
+    "needHFLogin": true,
     "addCpu": false,
     "runtime": {
         "autoGenerated": true,
diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config
index 60a17277..b3e6c90c 100644
--- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config
+++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config
@@ -6,6 +6,7 @@
         "autoGenerated": true,
         "useModelBuilder": "builder"
     },
+    "needHFLogin": true,
     "addCpu": false,
     "runtime": {
         "autoGenerated": true,
diff --git a/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb b/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb
index 53bac5da..6b78ea4e 100644
--- a/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb
+++ b/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb
@@ -19,31 +19,6 @@
    "source": [
     "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
     "import subprocess\n",
-<<<<<<< HEAD
-    "import json\n",
-    "import sys\n",
-    "import os\n",
-    "import onnxruntime_genai as og\n",
-    "\n",
-    "def register_execution_providers():\n",
-    "    worker_script = os.path.abspath('winml.py')\n",
-    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
-    "    paths = json.loads(result)\n",
-    "    for item in paths.items():\n",
-    "        og.register_execution_provider_library(item[0], item[1])\n",
-    "\n",
-    "register_execution_providers()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import onnxruntime_genai as og\n",
-=======
->>>>>>> 014446d171668720d793754bd3073fbc38675bc7
     "import json\n",
     "import sys\n",
     "import os\n",
diff --git a/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb b/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb
index 53bac5da..6b78ea4e 100644
--- a/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb
+++ b/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb
@@ -19,31 +19,6 @@
    "source": [
     "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
     "import subprocess\n",
-<<<<<<< HEAD
-    "import json\n",
-    "import sys\n",
-    "import os\n",
-    "import onnxruntime_genai as og\n",
-    "\n",
-    "def register_execution_providers():\n",
-    "    worker_script = os.path.abspath('winml.py')\n",
-    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
-    "    paths = json.loads(result)\n",
-    "    for item in paths.items():\n",
-    "        og.register_execution_provider_library(item[0], item[1])\n",
-    "\n",
-    "register_execution_providers()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import onnxruntime_genai as og\n",
-=======
->>>>>>> 014446d171668720d793754bd3073fbc38675bc7
     "import json\n",
     "import sys\n",
     "import os\n",
diff --git a/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb b/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb
index 53bac5da..6b78ea4e 100644
--- a/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb
+++ b/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb
@@ -19,31 +19,6 @@
    "source": [
     "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n",
     "import subprocess\n",
-<<<<<<< HEAD
-    "import json\n",
-    "import sys\n",
-    "import os\n",
-    "import onnxruntime_genai as og\n",
-    "\n",
-    "def register_execution_providers():\n",
-    "    worker_script = os.path.abspath('winml.py')\n",
-    "    result = subprocess.check_output([sys.executable, worker_script], text=True)\n",
-    "    paths = json.loads(result)\n",
-    "    for item in paths.items():\n",
-    "        og.register_execution_provider_library(item[0], item[1])\n",
-    "\n",
-    "register_execution_providers()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import onnxruntime_genai as og\n",
-=======
->>>>>>> 014446d171668720d793754bd3073fbc38675bc7
     "import json\n",
     "import sys\n",
     "import os\n",

From 3c8dfcded8fc3729d27c69c4b50d6af2b84adefd Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Tue, 14 Oct 2025 17:56:11 +0800
Subject: [PATCH 12/14] fix conflict

---
 .aitk/requirements/requirements-WCR.txt      | 15 ---------------
 .aitk/requirements/requirements-WCR_CUDA.txt | 12 ------------
 2 files changed, 27 deletions(-)

diff --git a/.aitk/requirements/requirements-WCR.txt b/.aitk/requirements/requirements-WCR.txt
index 2b7f0bed..d718352d 100644
--- a/.aitk/requirements/requirements-WCR.txt
+++ b/.aitk/requirements/requirements-WCR.txt
@@ -65,13 +65,6 @@ onnxruntime-winml==1.23.0.dev2025091008
 onnxscript==0.3.2
 # optimum==1.26.0
 optimum==1.26.0
-<<<<<<< HEAD
---index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple
---extra-index-url https://pypi.org/simple
-# winrt-runtime==3.2.1
-winrt-runtime==3.2.1
-typing-extensions==4.14.1
-=======
 optuna==4.2.1
 packaging==24.2
 pandas==2.2.3
@@ -126,17 +119,9 @@ wcwidth==0.2.13
 widgetsnbextension==4.0.13
 # winrt-runtime==3.2.1
 winrt-runtime==3.2.1
->>>>>>> 014446d171668720d793754bd3073fbc38675bc7
 # winrt-Windows.Foundation==3.2.1
 winrt-Windows.Foundation==3.2.1
 # winrt-Windows.Foundation.Collections==3.2.1
 winrt-Windows.Foundation.Collections==3.2.1
-<<<<<<< HEAD
-./wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
-# download:wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
-./wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
-# download:wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
-=======
 xxhash==3.5.0
 yarl==1.20.1
->>>>>>> 014446d171668720d793754bd3073fbc38675bc7
diff --git a/.aitk/requirements/requirements-WCR_CUDA.txt b/.aitk/requirements/requirements-WCR_CUDA.txt
index 2eb5cd27..cbc08baa 100644
--- a/.aitk/requirements/requirements-WCR_CUDA.txt
+++ b/.aitk/requirements/requirements-WCR_CUDA.txt
@@ -66,10 +66,6 @@ onnxruntime-winml==1.23.0.dev2025091008
 onnxscript==0.3.2
 # optimum==1.26.0
 optimum==1.26.0
-<<<<<<< HEAD
---index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple
---extra-index-url https://pypi.org/simple
-=======
 optuna==4.2.1
 packaging==24.2
 pandas==2.2.3
@@ -123,19 +119,11 @@ wasdk-Microsoft.Windows.AI.MachineLearning==1.8.250916003
 wasdk-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1.8.250916003
 wcwidth==0.2.13
 widgetsnbextension==4.0.13
->>>>>>> 014446d171668720d793754bd3073fbc38675bc7
 # winrt-runtime==3.2.1
 winrt-runtime==3.2.1
 # winrt-Windows.Foundation==3.2.1
 winrt-Windows.Foundation==3.2.1
 # winrt-Windows.Foundation.Collections==3.2.1
 winrt-Windows.Foundation.Collections==3.2.1
-<<<<<<< HEAD
-./wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
-# download:wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
-./wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
-# download:wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl
-=======
 xxhash==3.5.0
 yarl==1.20.1
->>>>>>> 014446d171668720d793754bd3073fbc38675bc7

From 7c361f84c705795c79441a5447b79eb2768c66d5 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Thu, 16 Oct 2025 17:28:53 +0800
Subject: [PATCH 13/14] fix merge issue

---
 .aitk/scripts/sanitize/main.py                          | 1 +
 google-bert-bert-base-multilingual-cased/aitk/README.md | 2 +-
 google-vit-base-patch16-224/aitk/README.md              | 2 +-
 intel-bert-base-uncased-mrpc/aitk/README.md             | 2 +-
 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md    | 2 +-
 microsoft-resnet-50/aitk/README.md                      | 2 +-
 openai-clip-vit-base-patch16/aitk/README.md             | 2 +-
 openai-clip-vit-base-patch32/aitk/README.md             | 2 +-
 8 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/.aitk/scripts/sanitize/main.py b/.aitk/scripts/sanitize/main.py
index 1f914430..74a44dee 100644
--- a/.aitk/scripts/sanitize/main.py
+++ b/.aitk/scripts/sanitize/main.py
@@ -53,6 +53,7 @@ def main():
     parameterTemplate = readCheckParameterTemplate(os.path.join(configDir, "parameter_template.json"))
 
     modelList.Check()
+
     # check each model
     for model in modelList.allModels():
         modelDir = shouldCheckModel(str(rootDir), configDir, model)
diff --git a/google-bert-bert-base-multilingual-cased/aitk/README.md b/google-bert-bert-base-multilingual-cased/aitk/README.md
index e509abd9..867ec378 100644
--- a/google-bert-bert-base-multilingual-cased/aitk/README.md
+++ b/google-bert-bert-base-multilingual-cased/aitk/README.md
@@ -3,7 +3,7 @@
 This folder contains examples of BERT optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
-- OpenVINO for Intel NPU
+- OpenVINO for Intel® CPU/GPU/NPU
 - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## QDQ for Qualcomm NPU / AMD NPU
diff --git a/google-vit-base-patch16-224/aitk/README.md b/google-vit-base-patch16-224/aitk/README.md
index 65a99f04..f2a46bb2 100644
--- a/google-vit-base-patch16-224/aitk/README.md
+++ b/google-vit-base-patch16-224/aitk/README.md
@@ -3,7 +3,7 @@
 This folder contains examples of VIT optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
-- OpenVINO for Intel NPU
+- OpenVINO for Intel® CPU/GPU/NPU
 - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Optimization Workflows
diff --git a/intel-bert-base-uncased-mrpc/aitk/README.md b/intel-bert-base-uncased-mrpc/aitk/README.md
index 0499bf19..0d59c1da 100644
--- a/intel-bert-base-uncased-mrpc/aitk/README.md
+++ b/intel-bert-base-uncased-mrpc/aitk/README.md
@@ -3,7 +3,7 @@
 This folder contains examples of BERT optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
-- OpenVINO for Intel NPU
+- OpenVINO for Intel® CPU/GPU/NPU
 - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## QDQ for Qualcomm NPU / AMD NPU
diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md
index 07918c72..91fe950c 100644
--- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md
+++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of Laion Clip optimization using different workflo
 
 - Text and vision model QDQ for Qualcomm NPU
 - QDQ for AMD NPU
-- OpenVINO for Intel NPU
+- OpenVINO for Intel® CPU/GPU/NPU
 - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Laion Clip text optimization with QDQ for Qualcomm NPU
diff --git a/microsoft-resnet-50/aitk/README.md b/microsoft-resnet-50/aitk/README.md
index 390294cc..ffd2ebc1 100644
--- a/microsoft-resnet-50/aitk/README.md
+++ b/microsoft-resnet-50/aitk/README.md
@@ -3,7 +3,7 @@
 This folder contains examples of ResNet optimization using different workflows.
 
 - QDQ for Qualcomm NPU / AMD NPU
-- OpenVINO for Intel NPU
+- OpenVINO for Intel® CPU/GPU/NPU
 - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## QDQ for Qualcomm NPU / AMD NPU
diff --git a/openai-clip-vit-base-patch16/aitk/README.md b/openai-clip-vit-base-patch16/aitk/README.md
index 25f4b93f..9e1c299d 100644
--- a/openai-clip-vit-base-patch16/aitk/README.md
+++ b/openai-clip-vit-base-patch16/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of Openai Clip optimization using different workfl
 
 - Text and vision model QDQ for Qualcomm NPU
 - QDQ for AMD NPU
-- OpenVINO for Intel NPU
+- OpenVINO for Intel® CPU/GPU/NPU
 - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Openai Clip text optimization with QDQ for Qualcomm NPU
diff --git a/openai-clip-vit-base-patch32/aitk/README.md b/openai-clip-vit-base-patch32/aitk/README.md
index 25f4b93f..9e1c299d 100644
--- a/openai-clip-vit-base-patch32/aitk/README.md
+++ b/openai-clip-vit-base-patch32/aitk/README.md
@@ -4,7 +4,7 @@ This folder contains examples of Openai Clip optimization using different workfl
 
 - Text and vision model QDQ for Qualcomm NPU
 - QDQ for AMD NPU
-- OpenVINO for Intel NPU
+- OpenVINO for Intel® CPU/GPU/NPU
 - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU
 
 ## Openai Clip text optimization with QDQ for Qualcomm NPU

From 47c0e63466eb58d74b8607bdd5f8719a06d4e119 Mon Sep 17 00:00:00 2001
From: Chao Zhang <zhangchao@microsoft.com>
Date: Mon, 20 Oct 2025 08:39:27 +0800
Subject: [PATCH 14/14] fix lint

---
 intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
index ebbca861..26a24bb8 100644
--- a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
+++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json
@@ -127,4 +127,4 @@
     "target": "target_system",
     "cache_dir": "cache",
     "output_dir": "model/bert_webgpu"
-}
\ No newline at end of file
+}