From 4bb42a467d38d47b7b6166815869667abf912506 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Wed, 6 Aug 2025 17:52:04 +0800 Subject: [PATCH 01/14] add rtx recipe --- .aitk/configs/checks.json | 6 +-- .aitk/configs/model_list.json | 4 ++ .../aitk/_copy.json.config | 31 ++++++++++++++ .../aitk/inference_sample.ipynb | 23 ++++++++++ Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml | 3 ++ .../aitk/model_project.config | 4 ++ .../aitk/qwen2_5_trtrtx_config.json | 38 +++++++++++++++++ .../aitk/qwen2_5_trtrtx_config.json.config | 42 +++++++++++++++++++ .../aitk/requirements.txt | 2 +- Qwen-Qwen2.5-1.5B-Instruct/aitk/winml.py | 21 ++++++++++ .../aitk/_copy.json.config | 8 ++++ .../aitk/deepseek_trtrtx_config.json | 38 +++++++++++++++++ .../aitk/deepseek_trtrtx_config.json.config | 42 +++++++++++++++++++ .../aitk/inference_sample.ipynb | 23 ++++++++++ .../aitk/info.yml | 3 ++ .../aitk/model_project.config | 4 ++ .../aitk/requirements.txt | 2 +- .../aitk/winml.py | 21 ++++++++++ .../aitk/_copy.json.config | 4 ++ .../aitk/inference_sample.ipynb | 24 +++++++++++ .../aitk/requirements.txt | 2 +- .../aitk/winml.py | 21 ++++++++++ .../aitk/_copy.json.config | 4 ++ .../aitk/inference_sample.ipynb | 24 +++++++++++ ...ase-patch16-224_dml_inference_sample.ipynb | 24 +++++++++++ ...-patch16-224_trtrtx_inference_sample.ipynb | 24 +++++++++++ google-vit-base-patch16-224/aitk/winml.py | 21 ++++++++++ .../aitk/inference_sample.ipynb | 24 +++++++++++ .../aitk/requirements.txt | 2 +- intel-bert-base-uncased-mrpc/aitk/winml.py | 21 ++++++++++ .../aitk/_copy.json.config | 4 ++ .../laion_clip_dml_inference_sample.ipynb | 25 +++++++++++ .../aitk/laion_clip_ov_inference_sample.ipynb | 28 +++++++++++++ .../laion_clip_qdq_amd_inference_sample.ipynb | 28 +++++++++++++ ...laion_clip_text_qnn_inference_sample.ipynb | 24 +++++++++++ .../laion_clip_trtrtx_inference_sample.ipynb | 25 +++++++++++ .../aitk/laion_clip_vision_qnn.json | 1 - .../aitk/winml.py | 21 ++++++++++ .../aitk/_copy.json.config | 31 ++++++++++++++ .../aitk/inference_sample.ipynb | 23 ++++++++++ .../aitk/info.yml | 3 ++ .../aitk/llama3_2_trtrtx_config.json | 38 +++++++++++++++++ .../aitk/llama3_2_trtrtx_config.json.config | 42 +++++++++++++++++++ .../aitk/model_project.config | 4 ++ .../aitk/requirements.txt | 2 +- .../aitk/winml.py | 21 ++++++++++ .../aitk/_copy.json.config | 31 ++++++++++++++ .../aitk/inference_sample.ipynb | 23 ++++++++++ microsoft-Phi-3.5-mini-instruct/aitk/info.yml | 3 ++ .../aitk/model_project.config | 4 ++ .../aitk/phi3_5_trtrtx_config.json | 38 +++++++++++++++++ .../aitk/phi3_5_trtrtx_config.json.config | 42 +++++++++++++++++++ .../aitk/requirements.txt | 2 +- microsoft-Phi-3.5-mini-instruct/aitk/winml.py | 21 ++++++++++ .../aitk/inference_sample.ipynb | 23 ++++++++++ .../aitk/inference_sample.ipynb | 23 ++++++++++ .../aitk/inference_sample.ipynb | 23 ++++++++++ .../aitk/inference_sample.ipynb | 24 +++++++++++ .../aitk/resnet_dml_inference_sample.ipynb | 23 ++++++++++ .../aitk/resnet_trtrtx_inference_sample.ipynb | 23 ++++++++++ .../aitk/_copy.json.config | 4 ++ .../openai_clip_dml_inference_sample.ipynb | 25 +++++++++++ .../openai_clip_ov_inference_sample.ipynb | 28 +++++++++++++ ...openai_clip_qdq_amd_inference_sample.ipynb | 28 +++++++++++++ ...penai_clip_text_qnn_inference_sample.ipynb | 24 +++++++++++ .../openai_clip_trtrtx_inference_sample.ipynb | 25 +++++++++++ .../aitk/openai_clip_vision_qnn.json | 1 - openai-clip-vit-base-patch16/aitk/winml.py | 21 ++++++++++ .../aitk/_copy.json.config | 4 ++ .../openai_clip_dml_inference_sample.ipynb | 25 +++++++++++ .../openai_clip_ov_inference_sample.ipynb | 28 +++++++++++++ ...openai_clip_qdq_amd_inference_sample.ipynb | 28 +++++++++++++ ...penai_clip_text_qnn_inference_sample.ipynb | 24 +++++++++++ .../openai_clip_trtrtx_inference_sample.ipynb | 25 +++++++++++ .../aitk/openai_clip_vision_qnn.json | 1 - openai-clip-vit-base-patch32/aitk/winml.py | 21 ++++++++++ 76 files changed, 1433 insertions(+), 12 deletions(-) create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json.config create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/winml.py create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/winml.py create mode 100644 google-bert-bert-base-multilingual-cased/aitk/winml.py create mode 100644 google-vit-base-patch16-224/aitk/winml.py create mode 100644 intel-bert-base-uncased-mrpc/aitk/winml.py create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/winml.py create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json.config create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/winml.py create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json.config create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/winml.py create mode 100644 openai-clip-vit-base-patch16/aitk/winml.py create mode 100644 openai-clip-vit-base-patch32/aitk/winml.py diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json index ed09ed73..5f84ecc8 100644 --- a/.aitk/configs/checks.json +++ b/.aitk/configs/checks.json @@ -1,12 +1,12 @@ { - "configCheck": 75, + "configCheck": 79, "extensionCheck": 1, "gitignoreCheck": 31, "inferenceModelCheck": 22, "ipynbCheck": 50, "modelProjectCheck": 32, "oliveCheck": 0, - "oliveJsonCheck": 75, - "pathCheck": 744, + "oliveJsonCheck": 79, + "pathCheck": 752, "requirementsCheck": 31 } diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index db801dfc..0edc00bd 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -23,6 +23,7 @@ "runtimes": [ "QNN", "AMDNPU", + "NvidiaTRTRTX", "DML", "IntelCPU", "IntelGPU", @@ -147,6 +148,7 @@ "runtimes": [ "QNN", "AMDNPU", + "NvidiaTRTRTX", "DML", "IntelCPU", "IntelGPU", @@ -195,6 +197,7 @@ "runtimes": [ "QNN", "AMDNPU", + "NvidiaTRTRTX", "DML", "IntelCPU", "IntelGPU", @@ -358,6 +361,7 @@ "runtimes": [ "QNN", "AMDNPU", + "NvidiaTRTRTX", "DML", "IntelCPU", "IntelGPU", diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config index a2d6f70d..999a03d6 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config @@ -16,6 +16,14 @@ "find": "deepseek_ov_config", "replace": "qwen2_5_ov_config" }, + { + "find": "deepseek_trtrtx_config", + "replace": "qwen2_5_trtrtx_config" + }, + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "Qwen/Qwen2.5-1.5B-Instruct" + }, { "find": "deepseek_dml_config", "replace": "qwen2_5_dml_config" @@ -84,6 +92,20 @@ } ] }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json", + "dst": "qwen2_5_trtrtx_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "Qwen/Qwen2.5-1.5B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/qwen2_5" + } + ] + }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "qwen2_5_dml_config.json", @@ -98,6 +120,11 @@ } ] }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config", + "dst": "qwen2_5_trtrtx_config.json.config", + "replacements": [] + }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", "dst": "qwen2_5_dml_config.json.config", @@ -135,6 +162,10 @@ "replace": "<|im_start|>user\\\\n{input}<|im_end|>\\\\n<|im_start|>assistant\\\\n" } ] + }, + { + "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", + "dst": "winml.py" } ] } diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb b/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb index 7757249e..c61db2d9 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/inference_sample.ipynb @@ -11,6 +11,29 @@ "model_folder = \"./model\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime_genai as og\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " og.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml index 81cd2194..e7bacd4c 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml @@ -14,6 +14,9 @@ recipes: - cpu - gpu ep: OpenVINOExecutionProvider + - file: "qwen2_5_trtrtx_config.json" + device: gpu + ep: NvTensorRTRTXExecutionProvider - file: "qwen2_5_dml_config.json" device: gpu ep: DmlExecutionProvider diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config index 68672843..61cb1603 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config @@ -12,6 +12,10 @@ "file": "qwen2_5_ov_config.json", "templateName": "qwen2_5_ov_config" }, + { + "file": "qwen2_5_trtrtx_config.json", + "templateName": "qwen2_5_trtrtx_config" + }, { "file": "qwen2_5_dml_config.json", "templateName": "qwen2_5_dml_config" diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json new file mode 100644 index 00000000..331d96fb --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json @@ -0,0 +1,38 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "Qwen/Qwen2.5-1.5B-Instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "NvTensorRTRTXExecutionProvider" ] } ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "builder": { "type": "ModelBuilder", "precision": "fp16" } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/qwen2_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} \ No newline at end of file diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json.config new file mode 100644 index 00000000..cb987c65 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json.config @@ -0,0 +1,42 @@ +{ + "name": "Convert to NVIDIA TRT for RTX", + "oliveFile": "", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "builder" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "NVIDIA TensorRT for RTX" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "NvTensorRTRTXExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.builder", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt b/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt index 7af84714..e3c23f80 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/requirements.txt @@ -1,4 +1,4 @@ # This file will be installed together with AITK runtime requirements # For the full requirements, see AITK datasets -optimum +optimum==1.26.0 diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/winml.py b/Qwen-Qwen2.5-1.5B-Instruct/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config new file mode 100644 index 00000000..6fd21fb1 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config @@ -0,0 +1,8 @@ +{ + "copies": [ + { + "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", + "dst": "winml.py" + } + ] +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json new file mode 100644 index 00000000..cff36b19 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json @@ -0,0 +1,38 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "NvTensorRTRTXExecutionProvider" ] } ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "builder": { "type": "ModelBuilder", "precision": "fp16" } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/deepseek", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} \ No newline at end of file diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config new file mode 100644 index 00000000..cb987c65 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config @@ -0,0 +1,42 @@ +{ + "name": "Convert to NVIDIA TRT for RTX", + "oliveFile": "", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "builder" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "NVIDIA TensorRT for RTX" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "NvTensorRTRTXExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.builder", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb index 67a72436..10076709 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/inference_sample.ipynb @@ -11,6 +11,29 @@ "model_folder = \"./model\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime_genai as og\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " og.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml index bcf926f3..0a43310f 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml @@ -16,6 +16,9 @@ recipes: ep: OpenVINOExecutionProvider aitk: oliveFile: "deepseek/openvino/DeepSeek-R1-Distill-Qwen-1.5B_context_ov_dynamic_sym_gs128_bkp_int8_sym_r1.json" + - file: "deepseek_trtrtx_config.json" + device: gpu + ep: NvTensorRTRTXExecutionProvider - file: "deepseek_dml_config.json" device: gpu ep: DmlExecutionProvider diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config index dab152a5..d78581fe 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config @@ -12,6 +12,10 @@ "file": "deepseek_ov_config.json", "templateName": "deepseek_ov_config" }, + { + "file": "deepseek_trtrtx_config.json", + "templateName": "deepseek_trtrtx_config" + }, { "file": "deepseek_dml_config.json", "templateName": "deepseek_dml_config" diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt index 7af84714..e3c23f80 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt @@ -1,4 +1,4 @@ # This file will be installed together with AITK runtime requirements # For the full requirements, see AITK datasets -optimum +optimum==1.26.0 diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/winml.py b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) diff --git a/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config b/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config index ff27826d..acda97ab 100644 --- a/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config +++ b/google-bert-bert-base-multilingual-cased/aitk/_copy.json.config @@ -13,6 +13,10 @@ "replace": "Convert to Qualcomm NPU" } ] + }, + { + "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", + "dst": "winml.py" } ] } diff --git a/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb b/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb index b86fca63..7f92e5e8 100644 --- a/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb +++ b/google-bert-bert-base-multilingual-cased/aitk/inference_sample.ipynb @@ -13,6 +13,29 @@ " onnx_model_path = \"./model/openvino_model_st_quant.onnx\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -76,6 +99,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/google-bert-bert-base-multilingual-cased/aitk/requirements.txt b/google-bert-bert-base-multilingual-cased/aitk/requirements.txt index b02be515..3fac61d6 100644 --- a/google-bert-bert-base-multilingual-cased/aitk/requirements.txt +++ b/google-bert-bert-base-multilingual-cased/aitk/requirements.txt @@ -2,4 +2,4 @@ # For the full requirements, see AITK olive-ai datasets -optimum +optimum==1.26.0 diff --git a/google-bert-bert-base-multilingual-cased/aitk/winml.py b/google-bert-bert-base-multilingual-cased/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) diff --git a/google-vit-base-patch16-224/aitk/_copy.json.config b/google-vit-base-patch16-224/aitk/_copy.json.config index 6a948f91..f924a60d 100644 --- a/google-vit-base-patch16-224/aitk/_copy.json.config +++ b/google-vit-base-patch16-224/aitk/_copy.json.config @@ -37,6 +37,10 @@ "replace": "NvTensorRTRTXExecutionProvider" } ] + }, + { + "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", + "dst": "winml.py" } ] } diff --git a/google-vit-base-patch16-224/aitk/inference_sample.ipynb b/google-vit-base-patch16-224/aitk/inference_sample.ipynb index 345bbece..ddcf70b1 100644 --- a/google-vit-base-patch16-224/aitk/inference_sample.ipynb +++ b/google-vit-base-patch16-224/aitk/inference_sample.ipynb @@ -13,6 +13,29 @@ " onnx_model_path = \"./model/ov_model_st_quant.onnx\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -167,6 +190,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb index d53b5df6..e7c6d91e 100644 --- a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb @@ -13,6 +13,29 @@ " onnx_model_path = \"./model/ov_model_st_quant.onnx\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -167,6 +190,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb index 98fd8b81..c7bb23c5 100644 --- a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb @@ -13,6 +13,29 @@ " onnx_model_path = \"./model/ov_model_st_quant.onnx\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -167,6 +190,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/google-vit-base-patch16-224/aitk/winml.py b/google-vit-base-patch16-224/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) diff --git a/intel-bert-base-uncased-mrpc/aitk/inference_sample.ipynb b/intel-bert-base-uncased-mrpc/aitk/inference_sample.ipynb index 53782e1f..689134e2 100644 --- a/intel-bert-base-uncased-mrpc/aitk/inference_sample.ipynb +++ b/intel-bert-base-uncased-mrpc/aitk/inference_sample.ipynb @@ -13,6 +13,29 @@ " onnx_model_path = \"./model/ov_model_st_quant.onnx\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -33,6 +56,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/intel-bert-base-uncased-mrpc/aitk/requirements.txt b/intel-bert-base-uncased-mrpc/aitk/requirements.txt index bad441ca..69515098 100644 --- a/intel-bert-base-uncased-mrpc/aitk/requirements.txt +++ b/intel-bert-base-uncased-mrpc/aitk/requirements.txt @@ -1,4 +1,4 @@ # This file will be installed together with AITK runtime requirements # For the full requirements, see AITK olive-ai -optimum +optimum==1.26.0 diff --git a/intel-bert-base-uncased-mrpc/aitk/winml.py b/intel-bert-base-uncased-mrpc/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/intel-bert-base-uncased-mrpc/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config index eb77a259..f99b7656 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config @@ -218,6 +218,10 @@ { "src": "../../openai-clip-vit-base-patch16/aitk/requirements.txt", "dst": "requirements.txt" + }, + { + "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", + "dst": "winml.py" } ] } diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb index c33db85d..e174c596 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml_inference_sample.ipynb @@ -11,6 +11,30 @@ "ExecutionProvider=\"DmlExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "22477669", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -42,6 +66,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb index df300a10..91277a22 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb @@ -15,6 +15,34 @@ "ExecutionProvider=\"OpenVINOExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf6bb9b8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb index b5dd1398..e3267d68 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb @@ -15,6 +15,34 @@ "ExecutionProvider=\"VitisAIExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf6bb9b8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb index 293b9b1f..f78cd53e 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb @@ -11,6 +11,30 @@ "ExecutionProvider=\"QNNExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0ea54b2", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "markdown", "id": "897ffb42-3569-4d78-b99d-355a38fdce35", diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb index c4c32324..fc5e4a5f 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx_inference_sample.ipynb @@ -11,6 +11,30 @@ "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "22477669", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -42,6 +66,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json index 20f32514..2f7d232f 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn.json @@ -2,7 +2,6 @@ "input_model": { "type": "PytorchModel", "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", - "generative": false, "io_config": { "input_names": [ "pixel_values" diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/winml.py b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config index 338ad7e5..87aead8c 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config @@ -16,6 +16,14 @@ "find": "deepseek_ov_config", "replace": "llama3_2_ov_config" }, + { + "find": "deepseek_trtrtx_config", + "replace": "llama3_2_trtrtx_config" + }, + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "meta-llama/Llama-3.2-1B-Instruct" + }, { "find": "deepseek_dml_config", "replace": "llama3_2_dml_config" @@ -100,6 +108,20 @@ } ] }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json", + "dst": "llama3_2_trtrtx_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "meta-llama/Llama-3.2-1B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/llama3_2" + } + ] + }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "llama3_2_dml_config.json", @@ -114,6 +136,11 @@ } ] }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config", + "dst": "llama3_2_trtrtx_config.json.config", + "replacements": [] + }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", "dst": "llama3_2_dml_config.json.config", @@ -151,6 +178,10 @@ "replace": "<|start_header_id|>user<|end_header_id|>\\\\n{input}<|start_header_id|>assistant<|end_header_id|>\\\\n" } ] + }, + { + "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", + "dst": "winml.py" } ] } diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb b/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb index 77a3070b..65766934 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/inference_sample.ipynb @@ -11,6 +11,29 @@ "model_folder = \"./model\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime_genai as og\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " og.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml index 16ebc30d..097226e1 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml @@ -14,6 +14,9 @@ recipes: - cpu - gpu ep: OpenVINOExecutionProvider + - file: "llama3_2_trtrtx_config.json" + device: gpu + ep: NvTensorRTRTXExecutionProvider - file: "llama3_2_dml_config.json" device: gpu ep: DmlExecutionProvider diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json new file mode 100644 index 00000000..a135798d --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json @@ -0,0 +1,38 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "meta-llama/Llama-3.2-1B-Instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "NvTensorRTRTXExecutionProvider" ] } ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "builder": { "type": "ModelBuilder", "precision": "fp16" } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/llama3_2", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} \ No newline at end of file diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json.config new file mode 100644 index 00000000..cb987c65 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json.config @@ -0,0 +1,42 @@ +{ + "name": "Convert to NVIDIA TRT for RTX", + "oliveFile": "", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "builder" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "NVIDIA TensorRT for RTX" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "NvTensorRTRTXExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.builder", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config index f5a73299..e800ea2d 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config @@ -12,6 +12,10 @@ "file": "llama3_2_ov_config.json", "templateName": "llama3_2_ov_config" }, + { + "file": "llama3_2_trtrtx_config.json", + "templateName": "llama3_2_trtrtx_config" + }, { "file": "llama3_2_dml_config.json", "templateName": "llama3_2_dml_config" diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt b/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt index 7af84714..e3c23f80 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/requirements.txt @@ -1,4 +1,4 @@ # This file will be installed together with AITK runtime requirements # For the full requirements, see AITK datasets -optimum +optimum==1.26.0 diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/winml.py b/meta-llama-Llama-3.2-1B-Instruct/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config index b4f7cbdb..e78f3b56 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config @@ -16,6 +16,14 @@ "find": "deepseek_ov_config", "replace": "phi3_5_ov_config" }, + { + "find": "deepseek_trtrtx_config", + "replace": "phi3_5_trtrtx_config" + }, + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "microsoft/Phi-3.5-mini-instruct" + }, { "find": "deepseek_dml_config", "replace": "phi3_5_dml_config" @@ -84,6 +92,20 @@ } ] }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json", + "dst": "phi3_5_trtrtx_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "microsoft/Phi-3.5-mini-instruct" + }, + { + "find": "model/deepseek", + "replace": "model/phi3_5" + } + ] + }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "phi3_5_dml_config.json", @@ -98,6 +120,11 @@ } ] }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config", + "dst": "phi3_5_trtrtx_config.json.config", + "replacements": [] + }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", "dst": "phi3_5_dml_config.json.config", @@ -131,6 +158,10 @@ "replace": "<|user|>\\\\n{input} <|end|>\\\\n<|assistant|>" } ] + }, + { + "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", + "dst": "winml.py" } ] } diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb b/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb index a47cdc58..71492571 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb +++ b/microsoft-Phi-3.5-mini-instruct/aitk/inference_sample.ipynb @@ -11,6 +11,29 @@ "model_folder = \"./model\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime_genai as og\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " og.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml index 2801977a..d1e2d6ea 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml +++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml @@ -14,6 +14,9 @@ recipes: - cpu - gpu ep: OpenVINOExecutionProvider + - file: "phi3_5_trtrtx_config.json" + device: gpu + ep: NvTensorRTRTXExecutionProvider - file: "phi3_5_dml_config.json" device: gpu ep: DmlExecutionProvider diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config index a5f764fe..358e2c6c 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config @@ -12,6 +12,10 @@ "file": "phi3_5_ov_config.json", "templateName": "phi3_5_ov_config" }, + { + "file": "phi3_5_trtrtx_config.json", + "templateName": "phi3_5_trtrtx_config" + }, { "file": "phi3_5_dml_config.json", "templateName": "phi3_5_dml_config" diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json new file mode 100644 index 00000000..aa9eaf9c --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json @@ -0,0 +1,38 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/Phi-3.5-mini-instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "NvTensorRTRTXExecutionProvider" ] } ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "builder": { "type": "ModelBuilder", "precision": "fp16" } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/phi3_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} \ No newline at end of file diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json.config new file mode 100644 index 00000000..cb987c65 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json.config @@ -0,0 +1,42 @@ +{ + "name": "Convert to NVIDIA TRT for RTX", + "oliveFile": "", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "builder" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "NVIDIA TensorRT for RTX" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "NvTensorRTRTXExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.builder", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt b/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt index 7af84714..e3c23f80 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt +++ b/microsoft-Phi-3.5-mini-instruct/aitk/requirements.txt @@ -1,4 +1,4 @@ # This file will be installed together with AITK runtime requirements # For the full requirements, see AITK datasets -optimum +optimum==1.26.0 diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/winml.py b/microsoft-Phi-3.5-mini-instruct/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) diff --git a/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb b/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb index 70e1b959..d62d3d12 100644 --- a/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb +++ b/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb @@ -11,6 +11,29 @@ "model_folder = \"./model\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime_genai as og\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " og.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb b/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb index 70e1b959..d62d3d12 100644 --- a/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb +++ b/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb @@ -11,6 +11,29 @@ "model_folder = \"./model\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime_genai as og\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " og.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb b/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb index 70e1b959..d62d3d12 100644 --- a/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb +++ b/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb @@ -11,6 +11,29 @@ "model_folder = \"./model\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime_genai as og\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " og.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/microsoft-resnet-50/aitk/inference_sample.ipynb b/microsoft-resnet-50/aitk/inference_sample.ipynb index c167ae59..e5e3c336 100644 --- a/microsoft-resnet-50/aitk/inference_sample.ipynb +++ b/microsoft-resnet-50/aitk/inference_sample.ipynb @@ -16,6 +16,29 @@ " transpose = True" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -54,6 +77,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb index 489618e6..5e70d5d2 100644 --- a/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb +++ b/microsoft-resnet-50/aitk/resnet_dml_inference_sample.ipynb @@ -10,6 +10,29 @@ "ExecutionProvider=\"DmlExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb index 25eebee1..2ec7d55a 100644 --- a/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb +++ b/microsoft-resnet-50/aitk/resnet_trtrtx_inference_sample.ipynb @@ -10,6 +10,29 @@ "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/openai-clip-vit-base-patch16/aitk/_copy.json.config b/openai-clip-vit-base-patch16/aitk/_copy.json.config index abd20714..2f6d2216 100644 --- a/openai-clip-vit-base-patch16/aitk/_copy.json.config +++ b/openai-clip-vit-base-patch16/aitk/_copy.json.config @@ -23,6 +23,10 @@ "replace": "DmlExecutionProvider" } ] + }, + { + "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", + "dst": "winml.py" } ] } diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb index 19f4bc70..83c5e565 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_dml_inference_sample.ipynb @@ -11,6 +11,30 @@ "ExecutionProvider=\"DmlExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "22477669", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -42,6 +66,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb index 18a7aa58..b133b368 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb @@ -15,6 +15,34 @@ "ExecutionProvider=\"OpenVINOExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf6bb9b8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb index a4cb3eb3..86363274 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb @@ -15,6 +15,34 @@ "ExecutionProvider=\"VitisAIExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf6bb9b8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb index 9f0a36b2..f00e5fef 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb @@ -11,6 +11,30 @@ "ExecutionProvider=\"QNNExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0ea54b2", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "markdown", "id": "897ffb42-3569-4d78-b99d-355a38fdce35", diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb index a3c6f084..2343edf0 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx_inference_sample.ipynb @@ -11,6 +11,30 @@ "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "22477669", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -42,6 +66,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json index b58a975f..cf98fb1b 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn.json @@ -2,7 +2,6 @@ "input_model": { "type": "PytorchModel", "model_path": "openai/clip-vit-base-patch16", - "generative": false, "io_config": { "input_names": [ "pixel_values" diff --git a/openai-clip-vit-base-patch16/aitk/winml.py b/openai-clip-vit-base-patch16/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) diff --git a/openai-clip-vit-base-patch32/aitk/_copy.json.config b/openai-clip-vit-base-patch32/aitk/_copy.json.config index 0a9e193a..a771d852 100644 --- a/openai-clip-vit-base-patch32/aitk/_copy.json.config +++ b/openai-clip-vit-base-patch32/aitk/_copy.json.config @@ -199,6 +199,10 @@ { "src": "../../openai-clip-vit-base-patch16/aitk/requirements.txt", "dst": "requirements.txt" + }, + { + "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", + "dst": "winml.py" } ] } diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb index db21746c..7e2f5cc6 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_dml_inference_sample.ipynb @@ -11,6 +11,30 @@ "ExecutionProvider=\"DmlExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "22477669", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -42,6 +66,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb index ef626f4c..0312f8d6 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb @@ -15,6 +15,34 @@ "ExecutionProvider=\"OpenVINOExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf6bb9b8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb index 95bfb0a4..fa35e6a2 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb @@ -15,6 +15,34 @@ "ExecutionProvider=\"VitisAIExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf6bb9b8", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb index 0a120030..3efc378d 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb @@ -11,6 +11,30 @@ "ExecutionProvider=\"QNNExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0ea54b2", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "markdown", "id": "897ffb42-3569-4d78-b99d-355a38fdce35", diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb index ee2b42fd..4c1986a4 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx_inference_sample.ipynb @@ -11,6 +11,30 @@ "ExecutionProvider=\"NvTensorRTRTXExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "22477669", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -42,6 +66,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json index a12522a0..7c7ed386 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn.json @@ -2,7 +2,6 @@ "input_model": { "type": "PytorchModel", "model_path": "openai/clip-vit-base-patch32", - "generative": false, "io_config": { "input_names": [ "pixel_values" diff --git a/openai-clip-vit-base-patch32/aitk/winml.py b/openai-clip-vit-base-patch32/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) From a5cfa124a8355b43c17b3a535e50b6f640ba9ac5 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Wed, 6 Aug 2025 17:58:50 +0800 Subject: [PATCH 02/14] add break --- .../aitk/laion_clip_ov_inference_sample.ipynb | 1 + .../laion_clip_qdq_amd_inference_sample.ipynb | 1 + ...laion_clip_text_qnn_inference_sample.ipynb | 1 + ...ion_clip_vision_qnn_inference_sample.ipynb | 25 +++++++++++++++++++ .../openai_clip_ov_inference_sample.ipynb | 1 + ...openai_clip_qdq_amd_inference_sample.ipynb | 1 + ...penai_clip_text_qnn_inference_sample.ipynb | 1 + ...nai_clip_vision_qnn_inference_sample.ipynb | 25 +++++++++++++++++++ .../openai_clip_ov_inference_sample.ipynb | 1 + ...openai_clip_qdq_amd_inference_sample.ipynb | 1 + ...penai_clip_text_qnn_inference_sample.ipynb | 1 + ...nai_clip_vision_qnn_inference_sample.ipynb | 25 +++++++++++++++++++ 12 files changed, 84 insertions(+) diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb index 91277a22..e09f24a8 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov_inference_sample.ipynb @@ -78,6 +78,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb index e3267d68..36eaada7 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd_inference_sample.ipynb @@ -78,6 +78,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb index f78cd53e..0884b6ac 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_text_qnn_inference_sample.ipynb @@ -80,6 +80,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb index 02cfa10a..aa8a8757 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_vision_qnn_inference_sample.ipynb @@ -12,6 +12,30 @@ "ExecutionProvider=\"QNNExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f82e3bca", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "markdown", "id": "897ffb42-3569-4d78-b99d-355a38fdce35", @@ -57,6 +81,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb index b133b368..9ba20f48 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_ov_inference_sample.ipynb @@ -78,6 +78,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb index 86363274..4c288088 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd_inference_sample.ipynb @@ -78,6 +78,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb index f00e5fef..46a0e8d6 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_text_qnn_inference_sample.ipynb @@ -80,6 +80,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb index f3609ed0..f90ea43a 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_vision_qnn_inference_sample.ipynb @@ -12,6 +12,30 @@ "ExecutionProvider=\"QNNExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f82e3bca", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "markdown", "id": "897ffb42-3569-4d78-b99d-355a38fdce35", @@ -57,6 +81,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb index 0312f8d6..03e0fc89 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_ov_inference_sample.ipynb @@ -78,6 +78,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb index fa35e6a2..658c7098 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd_inference_sample.ipynb @@ -78,6 +78,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", " \n", "opts = ort.SessionOptions()\n", " \n", diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb index 3efc378d..347c9d15 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_text_qnn_inference_sample.ipynb @@ -80,6 +80,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb index 518a97c7..0863f581 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_vision_qnn_inference_sample.ipynb @@ -12,6 +12,30 @@ "ExecutionProvider=\"QNNExecutionProvider\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f82e3bca", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, { "cell_type": "markdown", "id": "897ffb42-3569-4d78-b99d-355a38fdce35", @@ -57,6 +81,7 @@ " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", " print(f\"Adding {ep_name} for {device_type}\")\n", " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", "\n", "\n", "session_options = ort.SessionOptions()\n", From 193561525bbe889b2e752ebe5e91b379c284c213 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Fri, 8 Aug 2025 11:55:17 +0800 Subject: [PATCH 03/14] change req --- .aitk/requirements/requirements-WCR.txt | 24 ++++++++++++++++---- .aitk/requirements/requirements-WCR_CUDA.txt | 23 +++++++++++++++---- .aitk/scripts/install_freeze.py | 19 ++++++++++++---- 3 files changed, 52 insertions(+), 14 deletions(-) diff --git a/.aitk/requirements/requirements-WCR.txt b/.aitk/requirements/requirements-WCR.txt index 8220bd4e..b112f003 100644 --- a/.aitk/requirements/requirements-WCR.txt +++ b/.aitk/requirements/requirements-WCR.txt @@ -2,9 +2,10 @@ onnx==1.17.0 numpy==2.2.4 protobuf==6.30.2 -# olive-ai==0.9.1 -olive-ai==0.9.1 -onnxscript==0.2.5 +# olive-ai@git+https://github.com/microsoft/Olive.git#egg=olive-ai +olive-ai@git+https://github.com/microsoft/Olive.git#egg=olive-ai +onnx-ir==0.1.4 +onnxscript==0.3.2 optuna==4.2.1 pandas==2.2.3 pydantic==2.11.1 @@ -47,11 +48,11 @@ widgetsnbextension==4.0.13 # torchvision==0.22.0 torchvision==0.22.0 pillow==11.2.1 -# uvpip:install onnxruntime-winml==1.22.0.post1 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post +# uvpip:install onnxruntime-winml==1.22.0.post2 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post coloredlogs==15.0.1 flatbuffers==25.2.10 sympy==1.14.0 -# uvpip:install onnxruntime-genai-winml==0.8.3 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post +# uvpip:install onnxruntime-genai-winml==0.9.0.dev0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post # evaluate==0.4.3 evaluate==0.4.3 # scikit-learn==1.6.1 @@ -59,3 +60,16 @@ scikit-learn==1.6.1 joblib==1.5.0 scipy==1.15.3 threadpoolctl==3.6.0 +--index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple +--extra-index-url https://pypi.org/simple +# winrt-runtime==3.2.1 +winrt-runtime==3.2.1 +typing-extensions==4.14.1 +# winrt-Windows.Foundation==3.2.1 +winrt-Windows.Foundation==3.2.1 +# winrt-Windows.Foundation.Collections==3.2.1 +winrt-Windows.Foundation.Collections==3.2.1 +# winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4 +winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4 +# winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4 +winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4 diff --git a/.aitk/requirements/requirements-WCR_CUDA.txt b/.aitk/requirements/requirements-WCR_CUDA.txt index 1713433c..4b50fc63 100644 --- a/.aitk/requirements/requirements-WCR_CUDA.txt +++ b/.aitk/requirements/requirements-WCR_CUDA.txt @@ -12,9 +12,10 @@ typing-extensions==4.14.0 onnx==1.17.0 numpy==2.2.4 protobuf==6.30.2 -# olive-ai==0.9.1 -olive-ai==0.9.1 -onnxscript==0.2.5 +# olive-ai@git+https://github.com/microsoft/Olive.git#egg=olive-ai +olive-ai@git+https://github.com/microsoft/Olive.git#egg=olive-ai +onnx-ir==0.1.4 +onnxscript==0.3.2 optuna==4.2.1 pandas==2.2.3 pydantic==2.11.1 @@ -54,10 +55,10 @@ widgetsnbextension==4.0.13 # torchvision==0.22.0+cu128 torchvision==0.22.0+cu128 pillow==11.2.1 -# uvpip:install onnxruntime-winml==1.22.0.post1 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post +# uvpip:install onnxruntime-winml==1.22.0.post2 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post coloredlogs==15.0.1 flatbuffers==25.2.10 -# uvpip:install onnxruntime-genai-winml==0.8.3 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post +# uvpip:install onnxruntime-genai-winml==0.9.0.dev0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post # evaluate==0.4.3 evaluate==0.4.3 # scikit-learn==1.6.1 @@ -65,3 +66,15 @@ scikit-learn==1.6.1 joblib==1.5.0 scipy==1.15.3 threadpoolctl==3.6.0 +--index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple +--extra-index-url https://pypi.org/simple +# winrt-runtime==3.2.1 +winrt-runtime==3.2.1 +# winrt-Windows.Foundation==3.2.1 +winrt-Windows.Foundation==3.2.1 +# winrt-Windows.Foundation.Collections==3.2.1 +winrt-Windows.Foundation.Collections==3.2.1 +# winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4 +winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4 +# winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4 +winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4 diff --git a/.aitk/scripts/install_freeze.py b/.aitk/scripts/install_freeze.py index 02c2eecd..e4e5e5ad 100644 --- a/.aitk/scripts/install_freeze.py +++ b/.aitk/scripts/install_freeze.py @@ -15,10 +15,19 @@ depsPrefix = "# deps:" cudaExtraUrl = "--extra-index-url https://download.pytorch.org/whl/cu128" torchCudaVersion = "torch==2.7.0+cu128" -onnxruntimeWinmlVersion = f"{uvpipInstallPrefix} onnxruntime-winml==1.22.0.post1 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post" -onnxruntimeGenaiWinmlVersion = f"{uvpipInstallPrefix} onnxruntime-genai-winml==0.8.3 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post" +onnxruntimeWinmlVersion = f"{uvpipInstallPrefix} onnxruntime-winml==1.22.0.post2 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post" +onnxruntimeGenaiWinmlVersion = f"{uvpipInstallPrefix} onnxruntime-genai-winml==0.9.0.dev0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post" evaluateVersion = "evaluate==0.4.3" scikitLearnVersion = "scikit-learn==1.6.1" +winrtPackage = [ + "--index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple", + "--extra-index-url https://pypi.org/simple", + "winrt-runtime==3.2.1", + "winrt-Windows.Foundation==3.2.1", + "winrt-Windows.Foundation.Collections==3.2.1", + "winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4", + "winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4", +] def get_requires(name: str, args): @@ -50,8 +59,8 @@ def get_requires(name: str, args): def main(): # Constants - # if from git: "git+https://github.com/microsoft/Olive.git@COMMIT_ID#egg=olive_ai - oliveAi = "olive-ai==0.9.1" + # if from git: "git+https://github.com/microsoft/Olive.git@COMMIT_ID#egg=olive-ai + oliveAi = "olive-ai@git+https://github.com/microsoft/Olive.git#egg=olive-ai" torchVision = "torchvision==0.22.0" pre = { RuntimeEnum.NvidiaGPU: [ @@ -107,6 +116,7 @@ def main(): onnxruntimeGenaiWinmlVersion, evaluateVersion, scikitLearnVersion, + *winrtPackage, ], RuntimeEnum.WCR_CUDA: [ "torchvision==0.22.0+cu128", @@ -114,6 +124,7 @@ def main(): onnxruntimeGenaiWinmlVersion, evaluateVersion, scikitLearnVersion, + *winrtPackage, ], RuntimeEnum.QNN_LLLM: [ "ipykernel==6.29.5", From 72a4bf40532c7c2de243cae3a266571f7fd1c161 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Tue, 19 Aug 2025 15:28:52 +0800 Subject: [PATCH 04/14] add webgpu recipe --- .aitk/configs/checks.json | 6 +- .aitk/configs/model_list.json | 60 ++++---- .aitk/requirements/requirements-WCR.txt | 14 +- .aitk/requirements/requirements-WCR_CUDA.txt | 14 +- .aitk/scripts/model_lab/__init__.py | 1 + .aitk/scripts/sanitize/constants.py | 1 + .aitk/scripts/sanitize/main.py | 3 +- .aitk/scripts/sanitize/utils.py | 3 + .../aitk/_copy.json.config | 31 ++++- .../aitk/model_project.config | 4 + .../aitk/qwen2_5_webgpu_config.json | 38 +++++ .../aitk/qwen2_5_webgpu_config.json.config | 42 ++++++ .../aitk/deepseek_webgpu_config.json | 38 +++++ .../aitk/deepseek_webgpu_config.json.config | 42 ++++++ .../aitk/info.yml | 3 + .../aitk/model_project.config | 4 + .../aitk/bert_webgpu.json | 130 ++++++++++++++++++ .../aitk/bert_webgpu.json.config | 102 ++++++++++++++ intel-bert-base-uncased-mrpc/aitk/info.yml | 4 + .../aitk/model_project.config | 4 + .../aitk/_copy.json.config | 31 ++++- .../aitk/llama3_2_webgpu_config.json | 38 +++++ .../aitk/llama3_2_webgpu_config.json.config | 42 ++++++ .../aitk/model_project.config | 4 + .../aitk/_copy.json.config | 31 ++++- .../aitk/model_project.config | 4 + .../aitk/phi3_5_webgpu_config.json | 38 +++++ .../aitk/phi3_5_webgpu_config.json.config | 42 ++++++ 28 files changed, 716 insertions(+), 58 deletions(-) create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json create mode 100644 Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json create mode 100644 deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config create mode 100644 intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json create mode 100644 intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json create mode 100644 meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json create mode 100644 microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json index 5904d6a2..532ef433 100644 --- a/.aitk/configs/checks.json +++ b/.aitk/configs/checks.json @@ -1,13 +1,13 @@ { - "configCheck": 80, + "configCheck": 85, "extensionCheck": 1, "gitignoreCheck": 32, "inferenceModelCheck": 22, "ipynbCheck": 51, "modelProjectCheck": 33, "oliveCheck": 0, - "oliveJsonCheck": 80, - "pathCheck": 756, + "oliveJsonCheck": 85, + "pathCheck": 772, "requirementsCheck": 32, "venvRequirementsCheck": 12 } diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index 82540e55..2acec4fe 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -24,10 +24,11 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML", + "WebGpu" ], "architecture": "Transformer", "status": "Ready", @@ -73,10 +74,10 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", "status": "Ready", @@ -92,10 +93,10 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", "status": "Ready", @@ -111,10 +112,11 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML", + "WebGpu" ], "architecture": "Transformer", "status": "Ready", @@ -130,10 +132,10 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", "status": "Ready", @@ -149,10 +151,10 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", "status": "Ready", @@ -198,10 +200,10 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", "status": "Ready", @@ -242,9 +244,7 @@ "modelLink": "https://huggingface.co/microsoft/Phi-4-reasoning", "id": "huggingface/microsoft/Phi-4-reasoning", "runtimes": [ - "IntelCPU", - "IntelGPU", - "IntelNPU" + "IntelGPU" ], "architecture": "Transformer", "status": "Ready", @@ -257,9 +257,7 @@ "modelLink": "https://huggingface.co/microsoft/Phi-4-reasoning-plus", "id": "huggingface/microsoft/Phi-4-reasoning-plus", "runtimes": [ - "IntelCPU", - "IntelGPU", - "IntelNPU" + "IntelGPU" ], "architecture": "Transformer", "status": "Ready", @@ -275,10 +273,10 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "CNN", "status": "Ready", @@ -307,10 +305,10 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", "status": "Ready", @@ -326,10 +324,10 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", "status": "Ready", @@ -375,10 +373,10 @@ "QNN", "AMDNPU", "NvidiaTRTRTX", - "DML", "IntelCPU", "IntelGPU", - "IntelNPU" + "IntelNPU", + "DML" ], "architecture": "Transformer", "status": "Ready", @@ -584,7 +582,8 @@ "AMD NPU": "AMDNPU", "NVIDIA GPU": "NvidiaGPU", "NVIDIA TensorRT for RTX": "NvidiaTRTRTX", - "DirectML": "DML" + "DirectML": "DML", + "WebGpu": "WebGpu" }, "RuntimeToDisplayName": { "CPU": "CPU", @@ -596,6 +595,7 @@ "AMDNPU": "AMD NPU", "NvidiaGPU": "NVIDIA GPU", "NvidiaTRTRTX": "NVIDIA TensorRT for RTX", - "DML": "DirectML" + "DML": "DirectML", + "WebGpu": "WebGpu" } } diff --git a/.aitk/requirements/requirements-WCR.txt b/.aitk/requirements/requirements-WCR.txt index 0fcd89e0..7e3a9ce9 100644 --- a/.aitk/requirements/requirements-WCR.txt +++ b/.aitk/requirements/requirements-WCR.txt @@ -48,11 +48,13 @@ widgetsnbextension==4.0.13 # torchvision==0.22.0 torchvision==0.22.0 pillow==11.2.1 -# uvpip:install onnxruntime-winml==1.22.0.post2 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post +./onnxruntime_winml-1.23.0.dev20250811-cp312-cp312-win_amd64.whl +# download:onnxruntime_winml-1.23.0.dev20250811-cp312-cp312-win_amd64.whl coloredlogs==15.0.1 flatbuffers==25.2.10 sympy==1.14.0 -# uvpip:install onnxruntime-genai-winml==0.9.0.dev0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post +./onnxruntime_genai_winml-0.10.0.dev20250814-cp312-cp312-win_amd64.whl +# download:onnxruntime_genai_winml-0.10.0.dev20250814-cp312-cp312-win_amd64.whl # evaluate==0.4.3 evaluate==0.4.3 # scikit-learn==1.6.1 @@ -71,7 +73,7 @@ typing-extensions==4.14.1 winrt-Windows.Foundation==3.2.1 # winrt-Windows.Foundation.Collections==3.2.1 winrt-Windows.Foundation.Collections==3.2.1 -# winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4 -winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4 -# winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4 -winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4 +./wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl +# download:wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl +./wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl +# download:wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl diff --git a/.aitk/requirements/requirements-WCR_CUDA.txt b/.aitk/requirements/requirements-WCR_CUDA.txt index 048a8121..ceb0a03d 100644 --- a/.aitk/requirements/requirements-WCR_CUDA.txt +++ b/.aitk/requirements/requirements-WCR_CUDA.txt @@ -55,10 +55,12 @@ widgetsnbextension==4.0.13 # torchvision==0.22.0+cu128 torchvision==0.22.0+cu128 pillow==11.2.1 -# uvpip:install onnxruntime-winml==1.22.0.post2 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post +./onnxruntime_winml-1.23.0.dev20250811-cp312-cp312-win_amd64.whl +# download:onnxruntime_winml-1.23.0.dev20250811-cp312-cp312-win_amd64.whl coloredlogs==15.0.1 flatbuffers==25.2.10 -# uvpip:install onnxruntime-genai-winml==0.9.0.dev0 --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple --no-deps;post +./onnxruntime_genai_winml-0.10.0.dev20250814-cp312-cp312-win_amd64.whl +# download:onnxruntime_genai_winml-0.10.0.dev20250814-cp312-cp312-win_amd64.whl # evaluate==0.4.3 evaluate==0.4.3 # scikit-learn==1.6.1 @@ -76,7 +78,7 @@ winrt-runtime==3.2.1 winrt-Windows.Foundation==3.2.1 # winrt-Windows.Foundation.Collections==3.2.1 winrt-Windows.Foundation.Collections==3.2.1 -# winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4 -winui3-Microsoft.Windows.AI.MachineLearning==1!1.8.250702007.dev4 -# winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4 -winui3-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1!1.8.250702007.dev4 +./wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl +# download:wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl +./wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl +# download:wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl diff --git a/.aitk/scripts/model_lab/__init__.py b/.aitk/scripts/model_lab/__init__.py index acf74087..7d9c062d 100644 --- a/.aitk/scripts/model_lab/__init__.py +++ b/.aitk/scripts/model_lab/__init__.py @@ -11,6 +11,7 @@ class RuntimeEnum(Enum): IntelGPU = "IntelGPU" IntelNPU = "IntelNPU" DML = "DML" + WebGpu = "WebGpu" NvidiaGPU = "NvidiaGPU" WCR = "WCR" WCR_CUDA = "WCR_CUDA" diff --git a/.aitk/scripts/sanitize/constants.py b/.aitk/scripts/sanitize/constants.py index 8c46194e..6f5a49ab 100644 --- a/.aitk/scripts/sanitize/constants.py +++ b/.aitk/scripts/sanitize/constants.py @@ -92,6 +92,7 @@ class EPNames(Enum): VitisAIExecutionProvider = "VitisAIExecutionProvider" NvTensorRTRTXExecutionProvider = "NvTensorRTRTXExecutionProvider" DmlExecutionProvider = "DmlExecutionProvider" + WebGpuExecutionProvider = "WebGpuExecutionProvider" class OliveDeviceTypes(Enum): diff --git a/.aitk/scripts/sanitize/main.py b/.aitk/scripts/sanitize/main.py index 90331ac4..da1f7b69 100644 --- a/.aitk/scripts/sanitize/main.py +++ b/.aitk/scripts/sanitize/main.py @@ -53,6 +53,8 @@ def main(): # check parameter template parameterTemplate = readCheckParameterTemplate(os.path.join(configDir, "parameter_template.json")) + modelList.Check() + # check each model for model in modelList.allModels(): modelDir = shouldCheckModel(str(rootDir), configDir, model) @@ -171,7 +173,6 @@ def main(): # Write back to file newContent = json.dumps(inferenceModelData, indent=4, ensure_ascii=False) BaseModelClass.writeJsonIfChanged(newContent, inferenceModelFile, fileContent) - modelList.Check() if GlobalVars.olivePath: printWarning(f"Total {GlobalVars.oliveCheck} config files checked against olive json files") diff --git a/.aitk/scripts/sanitize/utils.py b/.aitk/scripts/sanitize/utils.py index 633b1b87..8640b814 100644 --- a/.aitk/scripts/sanitize/utils.py +++ b/.aitk/scripts/sanitize/utils.py @@ -43,6 +43,7 @@ class GlobalVars: RuntimeEnum.NvidiaGPU: EPNames.CUDAExecutionProvider, RuntimeEnum.NvidiaTRTRTX: EPNames.NvTensorRTRTXExecutionProvider, RuntimeEnum.DML: EPNames.DmlExecutionProvider, + RuntimeEnum.WebGpu: EPNames.WebGpuExecutionProvider, } RuntimeToOliveDeviceType = { RuntimeEnum.CPU: OliveDeviceTypes.CPU, @@ -54,6 +55,7 @@ class GlobalVars: RuntimeEnum.AMDNPU: OliveDeviceTypes.NPU, RuntimeEnum.NvidiaGPU: OliveDeviceTypes.GPU, RuntimeEnum.DML: OliveDeviceTypes.GPU, + RuntimeEnum.WebGpu: OliveDeviceTypes.GPU, } RuntimeToDisplayName = { RuntimeEnum.CPU: "CPU", @@ -66,6 +68,7 @@ class GlobalVars: RuntimeEnum.NvidiaGPU: "NVIDIA GPU", RuntimeEnum.NvidiaTRTRTX: "NVIDIA TensorRT for RTX", RuntimeEnum.DML: "DirectML", + RuntimeEnum.WebGpu: "WebGpu", } @classmethod diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config index 999a03d6..474439f6 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config @@ -27,6 +27,10 @@ { "find": "deepseek_dml_config", "replace": "qwen2_5_dml_config" + }, + { + "find": "deepseek_webgpu_config", + "replace": "qwen2_5_webgpu_config" } ] }, @@ -106,6 +110,11 @@ } ] }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config", + "dst": "qwen2_5_trtrtx_config.json.config", + "replacements": [] + }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "qwen2_5_dml_config.json", @@ -121,13 +130,27 @@ ] }, { - "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config", - "dst": "qwen2_5_trtrtx_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", + "dst": "qwen2_5_dml_config.json.config", "replacements": [] }, { - "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", - "dst": "qwen2_5_dml_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json", + "dst": "qwen2_5_webgpu_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "Qwen/Qwen2.5-1.5B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/qwen2_5" + } + ] + }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config", + "dst": "qwen2_5_webgpu_config.json.config", "replacements": [] }, { diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config index 61cb1603..8f5a8378 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config @@ -19,6 +19,10 @@ { "file": "qwen2_5_dml_config.json", "templateName": "qwen2_5_dml_config" + }, + { + "file": "qwen2_5_webgpu_config.json", + "templateName": "qwen2_5_webgpu_config" } ], "modelInfo": { diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json new file mode 100644 index 00000000..1ed86300 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json @@ -0,0 +1,38 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "Qwen/Qwen2.5-1.5B-Instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "builder": { "type": "ModelBuilder", "precision": "fp16" } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/qwen2_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} \ No newline at end of file diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config new file mode 100644 index 00000000..183017fd --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config @@ -0,0 +1,42 @@ +{ + "name": "Convert to WebGpu", + "oliveFile": "", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "builder" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGpu" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.builder", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json new file mode 100644 index 00000000..c02096ff --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json @@ -0,0 +1,38 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "builder": { "type": "ModelBuilder", "precision": "fp16" } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/deepseek", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} \ No newline at end of file diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config new file mode 100644 index 00000000..183017fd --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config @@ -0,0 +1,42 @@ +{ + "name": "Convert to WebGpu", + "oliveFile": "", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "builder" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGpu" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.builder", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml index 0a43310f..e430a860 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml @@ -22,6 +22,9 @@ recipes: - file: "deepseek_dml_config.json" device: gpu ep: DmlExecutionProvider + - file: "deepseek_webgpu_config.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config index d78581fe..b34c8b3a 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config @@ -19,6 +19,10 @@ { "file": "deepseek_dml_config.json", "templateName": "deepseek_dml_config" + }, + { + "file": "deepseek_webgpu_config.json", + "templateName": "deepseek_webgpu_config" } ], "modelInfo": { diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json new file mode 100644 index 00000000..ebbca861 --- /dev/null +++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json @@ -0,0 +1,130 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "Intel/bert-base-uncased-mrpc", + "task": "text-classification", + "load_kwargs": { + "attn_implementation": "eager" + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "WebGpuExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "glue_mrpc_eval", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "glue", + "subset": "mrpc", + "split": "validation" + }, + "pre_process_data_config": { + "max_length": 128, + "padding": "max_length", + "input_cols": [ + "sentence1", + "sentence2" + ], + "max_samples": 100 + }, + "dataloader_config": { + "batch_size": 1 + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "glue_mrpc_eval", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1 + }, + { + "name": "f1_score" + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "glue_mrpc_eval", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "bert", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "host_system", + "target": "target_system", + "cache_dir": "cache", + "output_dir": "model/bert_webgpu" +} \ No newline at end of file diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config new file mode 100644 index 00000000..eadfcfce --- /dev/null +++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config @@ -0,0 +1,102 @@ +{ + "name": "Convert to WebGpu", + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGpu" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "glue" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "glue" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/intel-bert-base-uncased-mrpc/aitk/info.yml b/intel-bert-base-uncased-mrpc/aitk/info.yml index 6655a388..c6b22743 100644 --- a/intel-bert-base-uncased-mrpc/aitk/info.yml +++ b/intel-bert-base-uncased-mrpc/aitk/info.yml @@ -22,6 +22,9 @@ recipes: - file: "bert_dml.json" device: gpu ep: DmlExecutionProvider + - file: "bert_webgpu.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/Intel/bert-base-uncased-mrpc" @@ -32,3 +35,4 @@ aitk: - file: "bert_ov.json" - file: "bert_trtrtx.json" - file: "bert_dml.json" + - file: "bert_webgpu.json" diff --git a/intel-bert-base-uncased-mrpc/aitk/model_project.config b/intel-bert-base-uncased-mrpc/aitk/model_project.config index ca302634..42b55101 100644 --- a/intel-bert-base-uncased-mrpc/aitk/model_project.config +++ b/intel-bert-base-uncased-mrpc/aitk/model_project.config @@ -19,6 +19,10 @@ { "file": "bert_dml.json", "templateName": "bert_dml" + }, + { + "file": "bert_webgpu.json", + "templateName": "bert_webgpu" } ], "modelInfo": { diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config index 87aead8c..0865b5d0 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config @@ -27,6 +27,10 @@ { "find": "deepseek_dml_config", "replace": "llama3_2_dml_config" + }, + { + "find": "deepseek_webgpu_config", + "replace": "llama3_2_webgpu_config" } ] }, @@ -122,6 +126,11 @@ } ] }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config", + "dst": "llama3_2_trtrtx_config.json.config", + "replacements": [] + }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "llama3_2_dml_config.json", @@ -137,13 +146,27 @@ ] }, { - "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config", - "dst": "llama3_2_trtrtx_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", + "dst": "llama3_2_dml_config.json.config", "replacements": [] }, { - "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", - "dst": "llama3_2_dml_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json", + "dst": "llama3_2_webgpu_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "meta-llama/Llama-3.2-1B-Instruct" + }, + { + "find": "model/deepseek", + "replace": "model/llama3_2" + } + ] + }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config", + "dst": "llama3_2_webgpu_config.json.config", "replacements": [] }, { diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json new file mode 100644 index 00000000..a9466a7d --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json @@ -0,0 +1,38 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "meta-llama/Llama-3.2-1B-Instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "builder": { "type": "ModelBuilder", "precision": "fp16" } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/llama3_2", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} \ No newline at end of file diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config new file mode 100644 index 00000000..183017fd --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config @@ -0,0 +1,42 @@ +{ + "name": "Convert to WebGpu", + "oliveFile": "", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "builder" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGpu" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.builder", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config index e800ea2d..eeb5c936 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config @@ -19,6 +19,10 @@ { "file": "llama3_2_dml_config.json", "templateName": "llama3_2_dml_config" + }, + { + "file": "llama3_2_webgpu_config.json", + "templateName": "llama3_2_webgpu_config" } ], "modelInfo": { diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config index e78f3b56..49a12afa 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config @@ -27,6 +27,10 @@ { "find": "deepseek_dml_config", "replace": "phi3_5_dml_config" + }, + { + "find": "deepseek_webgpu_config", + "replace": "phi3_5_webgpu_config" } ] }, @@ -106,6 +110,11 @@ } ] }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config", + "dst": "phi3_5_trtrtx_config.json.config", + "replacements": [] + }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json", "dst": "phi3_5_dml_config.json", @@ -121,13 +130,27 @@ ] }, { - "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json.config", - "dst": "phi3_5_trtrtx_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", + "dst": "phi3_5_dml_config.json.config", "replacements": [] }, { - "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json.config", - "dst": "phi3_5_dml_config.json.config", + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json", + "dst": "phi3_5_webgpu_config.json", + "replacements": [ + { + "find": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "replace": "microsoft/Phi-3.5-mini-instruct" + }, + { + "find": "model/deepseek", + "replace": "model/phi3_5" + } + ] + }, + { + "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config", + "dst": "phi3_5_webgpu_config.json.config", "replacements": [] }, { diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config index 358e2c6c..91eba57f 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config @@ -19,6 +19,10 @@ { "file": "phi3_5_dml_config.json", "templateName": "phi3_5_dml_config" + }, + { + "file": "phi3_5_webgpu_config.json", + "templateName": "phi3_5_webgpu_config" } ], "modelInfo": { diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json new file mode 100644 index 00000000..9c15cd02 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json @@ -0,0 +1,38 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/Phi-3.5-mini-instruct" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "WebGpuExecutionProvider" ] } ] + } + }, + "data_configs": [ + { + "name": "wikitext2_train", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "wikitext", + "subset": "wikitext-2-raw-v1", + "split": "train" + }, + "pre_process_data_config": { + "strategy": "line-by-line", + "add_special_tokens": false, + "max_samples": 128, + "max_seq_len": 512 + } + } + ], + "passes": { + "builder": { "type": "ModelBuilder", "precision": "fp16" } + }, + "target": "local_system", + "log_severity_level": 1, + "output_dir": "model/phi3_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} \ No newline at end of file diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config new file mode 100644 index 00000000..183017fd --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config @@ -0,0 +1,42 @@ +{ + "name": "Convert to WebGpu", + "oliveFile": "", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "builder" + }, + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGpu" + ], + "path": "systems.local_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.builder", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} From 38d6ff4ee9d11be27e311b5639e73f5be7c6c444 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Tue, 19 Aug 2025 16:29:22 +0800 Subject: [PATCH 05/14] align with AITK --- .aitk/configs/model_list.json | 8 ++++---- .aitk/scripts/model_lab/__init__.py | 2 +- .aitk/scripts/sanitize/utils.py | 6 +++--- .../aitk/qwen2_5_webgpu_config.json.config | 4 ++-- .../aitk/deepseek_webgpu_config.json.config | 4 ++-- intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config | 4 ++-- .../aitk/llama3_2_webgpu_config.json.config | 4 ++-- .../aitk/phi3_5_webgpu_config.json.config | 4 ++-- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index 2acec4fe..c5c96f36 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -28,7 +28,7 @@ "IntelGPU", "IntelNPU", "DML", - "WebGpu" + "WEBGPU" ], "architecture": "Transformer", "status": "Ready", @@ -116,7 +116,7 @@ "IntelGPU", "IntelNPU", "DML", - "WebGpu" + "WEBGPU" ], "architecture": "Transformer", "status": "Ready", @@ -583,7 +583,7 @@ "NVIDIA GPU": "NvidiaGPU", "NVIDIA TensorRT for RTX": "NvidiaTRTRTX", "DirectML": "DML", - "WebGpu": "WebGpu" + "WebGPU": "WEBGPU" }, "RuntimeToDisplayName": { "CPU": "CPU", @@ -596,6 +596,6 @@ "NvidiaGPU": "NVIDIA GPU", "NvidiaTRTRTX": "NVIDIA TensorRT for RTX", "DML": "DirectML", - "WebGpu": "WebGpu" + "WEBGPU": "WebGPU" } } diff --git a/.aitk/scripts/model_lab/__init__.py b/.aitk/scripts/model_lab/__init__.py index 7d9c062d..7ff8cf8b 100644 --- a/.aitk/scripts/model_lab/__init__.py +++ b/.aitk/scripts/model_lab/__init__.py @@ -11,7 +11,7 @@ class RuntimeEnum(Enum): IntelGPU = "IntelGPU" IntelNPU = "IntelNPU" DML = "DML" - WebGpu = "WebGpu" + WEBGPU = "WEBGPU" NvidiaGPU = "NvidiaGPU" WCR = "WCR" WCR_CUDA = "WCR_CUDA" diff --git a/.aitk/scripts/sanitize/utils.py b/.aitk/scripts/sanitize/utils.py index 8640b814..97806f90 100644 --- a/.aitk/scripts/sanitize/utils.py +++ b/.aitk/scripts/sanitize/utils.py @@ -43,7 +43,7 @@ class GlobalVars: RuntimeEnum.NvidiaGPU: EPNames.CUDAExecutionProvider, RuntimeEnum.NvidiaTRTRTX: EPNames.NvTensorRTRTXExecutionProvider, RuntimeEnum.DML: EPNames.DmlExecutionProvider, - RuntimeEnum.WebGpu: EPNames.WebGpuExecutionProvider, + RuntimeEnum.WEBGPU: EPNames.WebGpuExecutionProvider, } RuntimeToOliveDeviceType = { RuntimeEnum.CPU: OliveDeviceTypes.CPU, @@ -55,7 +55,7 @@ class GlobalVars: RuntimeEnum.AMDNPU: OliveDeviceTypes.NPU, RuntimeEnum.NvidiaGPU: OliveDeviceTypes.GPU, RuntimeEnum.DML: OliveDeviceTypes.GPU, - RuntimeEnum.WebGpu: OliveDeviceTypes.GPU, + RuntimeEnum.WEBGPU: OliveDeviceTypes.GPU, } RuntimeToDisplayName = { RuntimeEnum.CPU: "CPU", @@ -68,7 +68,7 @@ class GlobalVars: RuntimeEnum.NvidiaGPU: "NVIDIA GPU", RuntimeEnum.NvidiaTRTRTX: "NVIDIA TensorRT for RTX", RuntimeEnum.DML: "DirectML", - RuntimeEnum.WebGpu: "WebGpu", + RuntimeEnum.WEBGPU: "WebGPU", } @classmethod diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config index 183017fd..60a17277 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config @@ -1,5 +1,5 @@ { - "name": "Convert to WebGpu", + "name": "Convert to WebGPU", "oliveFile": "", "isLLM": true, "debugInfo": { @@ -12,7 +12,7 @@ "name": "Evaluate on", "type": "enum", "displayNames": [ - "WebGpu" + "WebGPU" ], "path": "systems.local_system.accelerators.0.execution_providers.0", "values": [ diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config index 183017fd..60a17277 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config @@ -1,5 +1,5 @@ { - "name": "Convert to WebGpu", + "name": "Convert to WebGPU", "oliveFile": "", "isLLM": true, "debugInfo": { @@ -12,7 +12,7 @@ "name": "Evaluate on", "type": "enum", "displayNames": [ - "WebGpu" + "WebGPU" ], "path": "systems.local_system.accelerators.0.execution_providers.0", "values": [ diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config index eadfcfce..b0f0ed25 100644 --- a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config +++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json.config @@ -1,12 +1,12 @@ { - "name": "Convert to WebGpu", + "name": "Convert to WebGPU", "addCpu": false, "runtime": { "autoGenerated": true, "name": "Evaluate on", "type": "enum", "displayNames": [ - "WebGpu" + "WebGPU" ], "path": "systems.target_system.accelerators.0.execution_providers.0", "values": [ diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config index 183017fd..60a17277 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config @@ -1,5 +1,5 @@ { - "name": "Convert to WebGpu", + "name": "Convert to WebGPU", "oliveFile": "", "isLLM": true, "debugInfo": { @@ -12,7 +12,7 @@ "name": "Evaluate on", "type": "enum", "displayNames": [ - "WebGpu" + "WebGPU" ], "path": "systems.local_system.accelerators.0.execution_providers.0", "values": [ diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config index 183017fd..60a17277 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config @@ -1,5 +1,5 @@ { - "name": "Convert to WebGpu", + "name": "Convert to WebGPU", "oliveFile": "", "isLLM": true, "debugInfo": { @@ -12,7 +12,7 @@ "name": "Evaluate on", "type": "enum", "displayNames": [ - "WebGpu" + "WebGPU" ], "path": "systems.local_system.accelerators.0.execution_providers.0", "values": [ From 60caa99312b8376acc201251f94b5e121f99bdcb Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Tue, 19 Aug 2025 17:31:59 +0800 Subject: [PATCH 06/14] fix llm webgpu precision --- .aitk/configs/checks.json | 6 +- .aitk/configs/model_list.json | 6 +- .../aitk/qwen2_5_webgpu_config.json | 2 +- .../aitk/deepseek_webgpu_config.json | 2 +- .../bert-base-multilingual-cased_webgpu.json | 138 +++++++++++++++++ ...base-multilingual-cased_webgpu.json.config | 123 +++++++++++++++ .../aitk/info.yml | 3 + .../aitk/model_project.config | 4 + google-vit-base-patch16-224/aitk/info.yml | 3 + .../aitk/model_project.config | 4 + .../aitk/vit-base-patch16-224_webgpu.json | 142 ++++++++++++++++++ .../vit-base-patch16-224_webgpu.json.config | 104 +++++++++++++ .../aitk/llama3_2_webgpu_config.json | 2 +- .../aitk/phi3_5_webgpu_config.json | 2 +- 14 files changed, 532 insertions(+), 9 deletions(-) create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json create mode 100644 google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json index 532ef433..e88bd74f 100644 --- a/.aitk/configs/checks.json +++ b/.aitk/configs/checks.json @@ -1,13 +1,13 @@ { - "configCheck": 85, + "configCheck": 87, "extensionCheck": 1, "gitignoreCheck": 32, "inferenceModelCheck": 22, "ipynbCheck": 51, "modelProjectCheck": 33, "oliveCheck": 0, - "oliveJsonCheck": 85, - "pathCheck": 772, + "oliveJsonCheck": 87, + "pathCheck": 789, "requirementsCheck": 32, "venvRequirementsCheck": 12 } diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index c5c96f36..724e1117 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -77,7 +77,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WEBGPU" ], "architecture": "Transformer", "status": "Ready", @@ -96,7 +97,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WEBGPU" ], "architecture": "Transformer", "status": "Ready", diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json index 1ed86300..8c4c0f8c 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json @@ -27,7 +27,7 @@ } ], "passes": { - "builder": { "type": "ModelBuilder", "precision": "fp16" } + "builder": { "type": "ModelBuilder", "precision": "int4" } }, "target": "local_system", "log_severity_level": 1, diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json index c02096ff..34cd324f 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json @@ -27,7 +27,7 @@ } ], "passes": { - "builder": { "type": "ModelBuilder", "precision": "fp16" } + "builder": { "type": "ModelBuilder", "precision": "int4" } }, "target": "local_system", "log_severity_level": 1, diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json new file mode 100644 index 00000000..962aba68 --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json @@ -0,0 +1,138 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google-bert/bert-base-multilingual-cased", + "task": "feature-extraction" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "WebGpuExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "load_dataset_config": { + "data_name": "facebook/xnli", + "subset": "en", + "split": "validation" + }, + "pre_process_data_config": { + "input_cols": [ + "premise" + ], + "padding": "max_length", + "max_length": 128, + "max_samples": 10 + }, + "dataloader_config": { + "batch_size": 1 + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 1, + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "bert", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "host": "host_system", + "target": "target_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/google_bert", + "evaluate_input_model": false +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config new file mode 100644 index 00000000..16d4d9bd --- /dev/null +++ b/google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json.config @@ -0,0 +1,123 @@ +{ + "name": "Convert to WebGPU", + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGPU" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "facebook/xnli" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Subset", + "tags": [ + "EvaluationDatasetSubset", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": { + "path": "data_configs[0].load_dataset_config.subset", + "values": [ + "en", + "all_languages" + ], + "template": "EvaluationDatasetSubset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Quantization Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": { + "path": "data_configs[0].pre_process_data_config.max_samples", + "template": "QuantizationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/google-bert-bert-base-multilingual-cased/aitk/info.yml b/google-bert-bert-base-multilingual-cased/aitk/info.yml index b87cb62d..dad37666 100644 --- a/google-bert-bert-base-multilingual-cased/aitk/info.yml +++ b/google-bert-bert-base-multilingual-cased/aitk/info.yml @@ -20,6 +20,9 @@ recipes: - file: "bert-base-multilingual-cased_dml.json" device: gpu ep: DmlExecutionProvider + - file: "bert-base-multilingual-cased_webgpu.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/google-bert/bert-base-multilingual-cased" diff --git a/google-bert-bert-base-multilingual-cased/aitk/model_project.config b/google-bert-bert-base-multilingual-cased/aitk/model_project.config index 41846e12..1272f822 100644 --- a/google-bert-bert-base-multilingual-cased/aitk/model_project.config +++ b/google-bert-bert-base-multilingual-cased/aitk/model_project.config @@ -19,6 +19,10 @@ { "file": "bert-base-multilingual-cased_dml.json", "templateName": "bert-base-multilingual-cased_dml" + }, + { + "file": "bert-base-multilingual-cased_webgpu.json", + "templateName": "bert-base-multilingual-cased_webgpu" } ], "modelInfo": { diff --git a/google-vit-base-patch16-224/aitk/info.yml b/google-vit-base-patch16-224/aitk/info.yml index cdc2474e..2ca96f51 100644 --- a/google-vit-base-patch16-224/aitk/info.yml +++ b/google-vit-base-patch16-224/aitk/info.yml @@ -20,6 +20,9 @@ recipes: - file: "vit-base-patch16-224_dml.json" device: gpu ep: DmlExecutionProvider + - file: "vit-base-patch16-224_webgpu.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/google/vit-base-patch16-224" diff --git a/google-vit-base-patch16-224/aitk/model_project.config b/google-vit-base-patch16-224/aitk/model_project.config index 7ec62cd3..41f5cd4e 100644 --- a/google-vit-base-patch16-224/aitk/model_project.config +++ b/google-vit-base-patch16-224/aitk/model_project.config @@ -19,6 +19,10 @@ { "file": "vit-base-patch16-224_dml.json", "templateName": "vit-base-patch16-224_dml" + }, + { + "file": "vit-base-patch16-224_webgpu.json", + "templateName": "vit-base-patch16-224_webgpu" } ], "modelInfo": { diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json new file mode 100644 index 00000000..c1b6866e --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json @@ -0,0 +1,142 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "google/vit-base-patch16-224", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "output" + ] + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "WebGpuExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "vit-base-patch16-224.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1000 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 20, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "OrtTransformersOptimization", + "model_type": "vit", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "host": "host_system", + "target": "target_system", + "cache_dir": "cache", + "output_dir": "model/vit" +} diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config new file mode 100644 index 00000000..23c3f074 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json.config @@ -0,0 +1,104 @@ +{ + "name": "Convert to WebGPU", + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGPU" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json index a9466a7d..c27b5153 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json @@ -27,7 +27,7 @@ } ], "passes": { - "builder": { "type": "ModelBuilder", "precision": "fp16" } + "builder": { "type": "ModelBuilder", "precision": "int4" } }, "target": "local_system", "log_severity_level": 1, diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json index 9c15cd02..71037506 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json @@ -27,7 +27,7 @@ } ], "passes": { - "builder": { "type": "ModelBuilder", "precision": "fp16" } + "builder": { "type": "ModelBuilder", "precision": "int4" } }, "target": "local_system", "log_severity_level": 1, From ecc64cfaff9a864ee8dedb00a90ed086e5a09ae3 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Wed, 20 Aug 2025 13:10:11 +0800 Subject: [PATCH 07/14] add all webgpu recipes --- .aitk/configs/checks.json | 8 +- .aitk/configs/model_list.json | 21 +- Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml | 3 + .../aitk/_copy.json.config | 10 + ...-patch16-224_webgpu_inference_sample.ipynb | 233 ++++++++++++++++++ intel-bert-base-uncased-mrpc/aitk/info.yml | 7 - .../aitk/_copy.json.config | 25 ++ .../aitk/info.yml | 3 + .../aitk/laion_clip_webgpu.json | 206 ++++++++++++++++ .../aitk/laion_clip_webgpu.json.config | 84 +++++++ .../laion_clip_webgpu_inference_sample.ipynb | 115 +++++++++ .../aitk/model_project.config | 4 + .../aitk/info.yml | 3 + microsoft-Phi-3.5-mini-instruct/aitk/info.yml | 3 + microsoft-resnet-50/aitk/_copy.json.config | 10 + microsoft-resnet-50/aitk/info.yml | 3 + microsoft-resnet-50/aitk/model_project.config | 4 + microsoft-resnet-50/aitk/resnet_webgpu.json | 121 +++++++++ .../aitk/resnet_webgpu.json.config | 104 ++++++++ .../aitk/resnet_webgpu_inference_sample.ipynb | 145 +++++++++++ microsoft-resnet-50/aitk/winml.py | 21 ++ .../aitk/_copy.json.config | 10 + openai-clip-vit-base-patch16/aitk/info.yml | 3 + .../aitk/model_project.config | 4 + .../aitk/openai_clip_webgpu.json | 206 ++++++++++++++++ .../aitk/openai_clip_webgpu.json.config | 84 +++++++ .../openai_clip_webgpu_inference_sample.ipynb | 115 +++++++++ .../aitk/_copy.json.config | 25 ++ openai-clip-vit-base-patch32/aitk/info.yml | 3 + .../aitk/model_project.config | 4 + .../aitk/openai_clip_webgpu.json | 206 ++++++++++++++++ .../aitk/openai_clip_webgpu.json.config | 84 +++++++ .../openai_clip_webgpu_inference_sample.ipynb | 115 +++++++++ 33 files changed, 1974 insertions(+), 18 deletions(-) create mode 100644 google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config create mode 100644 laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb create mode 100644 microsoft-resnet-50/aitk/resnet_webgpu.json create mode 100644 microsoft-resnet-50/aitk/resnet_webgpu.json.config create mode 100644 microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb create mode 100644 microsoft-resnet-50/aitk/winml.py create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config create mode 100644 openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config create mode 100644 openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json index e88bd74f..6fcea59f 100644 --- a/.aitk/configs/checks.json +++ b/.aitk/configs/checks.json @@ -1,13 +1,13 @@ { - "configCheck": 87, + "configCheck": 91, "extensionCheck": 1, "gitignoreCheck": 32, "inferenceModelCheck": 22, - "ipynbCheck": 51, + "ipynbCheck": 56, "modelProjectCheck": 33, "oliveCheck": 0, - "oliveJsonCheck": 87, - "pathCheck": 789, + "oliveJsonCheck": 91, + "pathCheck": 818, "requirementsCheck": 32, "venvRequirementsCheck": 12 } diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index 724e1117..eb795b84 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -137,7 +137,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WEBGPU" ], "architecture": "Transformer", "status": "Ready", @@ -156,7 +157,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WEBGPU" ], "architecture": "Transformer", "status": "Ready", @@ -205,7 +207,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WEBGPU" ], "architecture": "Transformer", "status": "Ready", @@ -278,7 +281,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WEBGPU" ], "architecture": "CNN", "status": "Ready", @@ -310,7 +314,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WEBGPU" ], "architecture": "Transformer", "status": "Ready", @@ -329,7 +334,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WEBGPU" ], "architecture": "Transformer", "status": "Ready", @@ -378,7 +384,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WEBGPU" ], "architecture": "Transformer", "status": "Ready", diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml index e7bacd4c..efe47e27 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml @@ -20,6 +20,9 @@ recipes: - file: "qwen2_5_dml_config.json" device: gpu ep: DmlExecutionProvider + - file: "qwen2_5_webgpu_config.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct" diff --git a/google-vit-base-patch16-224/aitk/_copy.json.config b/google-vit-base-patch16-224/aitk/_copy.json.config index f924a60d..9f13da90 100644 --- a/google-vit-base-patch16-224/aitk/_copy.json.config +++ b/google-vit-base-patch16-224/aitk/_copy.json.config @@ -38,6 +38,16 @@ } ] }, + { + "src": "vit-base-patch16-224_dml_inference_sample.ipynb", + "dst": "vit-base-patch16-224_webgpu_inference_sample.ipynb", + "replacements": [ + { + "find": "DmlExecutionProvider", + "replace": "WebGpuExecutionProvider" + } + ] + }, { "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", "dst": "winml.py" diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb new file mode 100644 index 00000000..fa55eb61 --- /dev/null +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb @@ -0,0 +1,233 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "\n", + "ExecutionProvider=\"WebGpuExecutionProvider\"\n", + "if ExecutionProvider == \"OpenVINOExecutionProvider\":\n", + " onnx_model_path = \"./model/ov_model_st_quant.onnx\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import onnxruntime as ort\n", + "import time\n", + "import torch\n", + "import torchvision.transforms as transforms\n", + "from datasets import load_dataset\n", + "from transformers import ViTFeatureExtractor, ViTForImageClassification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_samples = 256" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load datasets\n", + "\n", + "feature_extractor = ViTFeatureExtractor.from_pretrained(\"google/vit-base-patch16-224\")\n", + "preprocess = transforms.Compose([\n", + " transforms.Lambda(lambda img: img.convert(\"RGB\")),\n", + " transforms.Resize((224, 224)),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),\n", + "])\n", + "\n", + "def imageTransform(example):\n", + " example[\"image\"] = preprocess(example[\"image\"])\n", + " return example\n", + "datasetStream = load_dataset(\"timm/mini-imagenet\", split=\"validation\", streaming=True, trust_remote_code=True)\n", + "iterable_dataset = iter(datasetStream)\n", + "selected_samples = [next(iterable_dataset) for _ in range(num_samples)]\n", + "selected_samples = list(map(imageTransform, selected_samples))\n", + "\n", + "def get_imagenet_label_map():\n", + " import json\n", + " from pathlib import Path\n", + " cache_file = Path(f\"../../cache/data/imagenet_class_index.json\")\n", + " if not cache_file.exists():\n", + " import requests \n", + " imagenet_class_index_url = (\n", + " \"https://raw.githubusercontent.com/pytorch/vision/main/gallery/assets/imagenet_class_index.json\"\n", + " )\n", + " response = requests.get(imagenet_class_index_url)\n", + " response.raise_for_status() # Ensure the request was successful\n", + " content = response.json()\n", + " cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)\n", + " with open(cache_file, \"w\") as f:\n", + " json.dump(content, f)\n", + " else:\n", + " with open(cache_file) as f:\n", + " content = json.loads(f.read())\n", + "\n", + " return {v[0]: int(k) for k, v in content.items()}\n", + "\n", + "label_map = get_imagenet_label_map()\n", + "label_names = datasetStream.features[\"label\"].names\n", + "\n", + "def mini_to_imagenet_label(mini_label):\n", + " class_name = label_names[mini_label]\n", + " return label_map[class_name]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Original model metrics\n", + "\n", + "def evaluate_torch(model, selected_samples, device):\n", + " model.eval()\n", + " correct, total = 0, 0\n", + " latencies = []\n", + " with torch.no_grad():\n", + " for example in selected_samples:\n", + " image = example[\"image\"].unsqueeze(0).to(device)\n", + " label = torch.tensor(example[\"label\"]).to(device)\n", + " label = mini_to_imagenet_label(label.item())\n", + " \n", + " start_time = time.time()\n", + " output = model(image)\n", + " end_time = time.time()\n", + " \n", + " latencies.append((end_time - start_time))\n", + " pred = torch.argmax(output.logits, dim=1)\n", + " correct += (pred == label).sum().item()\n", + " total += 1\n", + " \n", + " accuracy = correct / total\n", + " avg_latency = np.mean(latencies)\n", + " return accuracy, avg_latency\n", + "\n", + "device = torch.device(\"cpu\")\n", + "model = ViTForImageClassification.from_pretrained(\"google/vit-base-patch16-224\").to(device)\n", + "accuracy, avg_latency = evaluate_torch(model, selected_samples, device)\n", + "\n", + "print(f\"Original Model Accuracy: {accuracy * 100:.2f}%\")\n", + "print(f\"Original Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Quantized model metrics\n", + "\n", + "def evaluate_onnx(session, selected_samples):\n", + " correct, total = 0, 0\n", + " latencies = []\n", + " input_name = session.get_inputs()[0].name\n", + " output_name = session.get_outputs()[0].name\n", + "\n", + " for example in selected_samples:\n", + " image = np.expand_dims(example[\"image\"], axis=0)\n", + " label = example[\"label\"]\n", + " label = mini_to_imagenet_label(label)\n", + " \n", + " start_time = time.time()\n", + " output = session.run([output_name], {input_name: image.astype(np.float16)})[0]\n", + " end_time = time.time()\n", + " \n", + " latencies.append((end_time - start_time))\n", + " pred = np.argmax(output, axis=1)[0]\n", + " correct += (pred == label)\n", + " total += 1\n", + " \n", + " accuracy = correct / total\n", + " avg_latency = np.mean(latencies)\n", + " return accuracy, avg_latency\n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "session = ort.InferenceSession(\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "accuracy, avg_latency = evaluate_onnx(session, selected_samples)\n", + "\n", + "print(f\"Quantized Model Accuracy: {accuracy * 100:.2f}%\")\n", + "print(f\"Quantized Model Average Latency Per Image: {avg_latency * 1000:.2f} ms\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python-WCR-win32-x64-3.12.9", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/intel-bert-base-uncased-mrpc/aitk/info.yml b/intel-bert-base-uncased-mrpc/aitk/info.yml index c6b22743..36c8d26b 100644 --- a/intel-bert-base-uncased-mrpc/aitk/info.yml +++ b/intel-bert-base-uncased-mrpc/aitk/info.yml @@ -29,10 +29,3 @@ aitk: modelInfo: id: "huggingface/Intel/bert-base-uncased-mrpc" version: 1 - workflows: - - file: "bert_qdq_qnn.json" - - file: "bert_qdq_amd.json" - - file: "bert_ov.json" - - file: "bert_trtrtx.json" - - file: "bert_dml.json" - - file: "bert_webgpu.json" diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config index f99b7656..aabcb60f 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/_copy.json.config @@ -193,6 +193,31 @@ } ] }, + { + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json", + "dst": "laion_clip_webgpu.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, + { + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config", + "dst": "laion_clip_webgpu.json.config", + "replacements": [] + }, + { + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb", + "dst": "laion_clip_webgpu_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K" + } + ] + }, { "src": "../../openai-clip-vit-base-patch16/aitk/clip_script.py", "dst": "clip_script.py" diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml index e13c2a8d..22924ead 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/info.yml @@ -25,6 +25,9 @@ recipes: - file: "laion_clip_dml.json" device: gpu ep: DmlExecutionProvider + - file: "laion_clip_webgpu.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/laion/CLIP-ViT-B-32-laion2B-s34B-b79K" diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json new file mode 100644 index 00000000..947df9d3 --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json @@ -0,0 +1,206 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image", + "logits_per_text", + "text_embeds", + "image_embeds" + ], + "output_shapes": [ + [ + 1, + 10 + ], + [ + 10, + 1 + ], + [ + 10, + 512 + ], + [ + 1, + 512 + ] + ] + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "WebGpuExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "search_strategy": false, + "host": "host_system", + "target": "target_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip" +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config new file mode 100644 index 00000000..d17c25fa --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json.config @@ -0,0 +1,84 @@ +{ + "name": "Convert to WebGPU", + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGPU" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].load_dataset_config.end", + "template": { + "path": "data_configs[0].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb new file mode 100644 index 00000000..6938c9bb --- /dev/null +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu_inference_sample.ipynb @@ -0,0 +1,115 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"WebGpuExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22477669", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"laion/CLIP-ViT-B-32-laion2B-s34B-b79K\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "winml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config index 311c217d..d429eafb 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/model_project.config @@ -23,6 +23,10 @@ { "file": "laion_clip_dml.json", "templateName": "laion_clip_dml" + }, + { + "file": "laion_clip_webgpu.json", + "templateName": "laion_clip_webgpu" } ], "modelInfo": { diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml index 46b40c82..635bc818 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml @@ -20,6 +20,9 @@ recipes: - file: "llama3_2_dml_config.json" device: gpu ep: DmlExecutionProvider + - file: "llama3_2_webgpu_config.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/meta-llama/Llama-3.2-1B-Instruct" diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml index 57b16388..2ad80323 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml +++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml @@ -20,6 +20,9 @@ recipes: - file: "phi3_5_dml_config.json" device: gpu ep: DmlExecutionProvider + - file: "phi3_5_webgpu_config.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/microsoft/Phi-3.5-mini-instruct" diff --git a/microsoft-resnet-50/aitk/_copy.json.config b/microsoft-resnet-50/aitk/_copy.json.config index 959fd3ba..d94f9329 100644 --- a/microsoft-resnet-50/aitk/_copy.json.config +++ b/microsoft-resnet-50/aitk/_copy.json.config @@ -23,6 +23,16 @@ "replace": "DmlExecutionProvider" } ] + }, + { + "src": "resnet_trtrtx_inference_sample.ipynb", + "dst": "resnet_webgpu_inference_sample.ipynb", + "replacements": [ + { + "find": "NvTensorRTRTXExecutionProvider", + "replace": "WebGpuExecutionProvider" + } + ] } ] } diff --git a/microsoft-resnet-50/aitk/info.yml b/microsoft-resnet-50/aitk/info.yml index 038d7eb6..af6ef72f 100644 --- a/microsoft-resnet-50/aitk/info.yml +++ b/microsoft-resnet-50/aitk/info.yml @@ -20,6 +20,9 @@ recipes: - file: "resnet_dml.json" device: gpu ep: DmlExecutionProvider + - file: "resnet_webgpu.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/microsoft/resnet-50" diff --git a/microsoft-resnet-50/aitk/model_project.config b/microsoft-resnet-50/aitk/model_project.config index 2a944b44..554360ed 100644 --- a/microsoft-resnet-50/aitk/model_project.config +++ b/microsoft-resnet-50/aitk/model_project.config @@ -19,6 +19,10 @@ { "file": "resnet_dml.json", "templateName": "resnet_dml" + }, + { + "file": "resnet_webgpu.json", + "templateName": "resnet_webgpu" } ], "modelInfo": { diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json b/microsoft-resnet-50/aitk/resnet_webgpu.json new file mode 100644 index 00000000..e64119cf --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_webgpu.json @@ -0,0 +1,121 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/resnet-50", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "logits" + ] + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "WebGpuExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true, + "trust_remote_code": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation" + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1001 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "evaluation_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "device": "cpu", + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "dynamic": false, + "use_dynamo_exporter": false + }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", + "save_as_external_data": true + } + }, + "host": "host_system", + "target": "target_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/resnet_webgpu", + "evaluate_input_model": false +} diff --git a/microsoft-resnet-50/aitk/resnet_webgpu.json.config b/microsoft-resnet-50/aitk/resnet_webgpu.json.config new file mode 100644 index 00000000..23c3f074 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_webgpu.json.config @@ -0,0 +1,104 @@ +{ + "name": "Convert to WebGPU", + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGPU" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.data_name", + "values": [ + "timm/mini-imagenet", + "imagenet-1k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Split", + "tags": [ + "EvaluationDatasetSplit", + "DependsOnDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.split", + "values": [ + "train", + "validation", + "test" + ], + "template": { + "path": "data_configs[0].load_dataset_config.split", + "template": "EvaluationDatasetSplit" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].pre_process_data_config.size", + "template": { + "path": "data_configs[0].pre_process_data_config.size", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb b/microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb new file mode 100644 index 00000000..7cc8ad1c --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_webgpu_inference_sample.ipynb @@ -0,0 +1,145 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"WebGpuExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from PIL import Image\n", + "url = \"https://onnxruntime.ai/images/dog.jpeg\"\n", + "response = requests.get(url)\n", + "# Save the image to a file\n", + "with open(\"dog.jpeg\", \"wb\") as file:\n", + " file.write(response.content)\n", + "img = Image.open(\"dog.jpeg\")\n", + "img" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import onnxruntime as ort\n", + "from PIL import Image\n", + "import torch\n", + "import torchvision.transforms as transforms\n", + "from torchvision.models.resnet import ResNet50_Weights\n", + "import numpy as np\n", + "\n", + "image_file_path = \"dog.jpeg\"\n", + "\n", + "# Create ONNX runtime session\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", + "\n", + "\n", + "session_options = ort.SessionOptions()\n", + "\n", + "add_ep_for_device(session_options, ExecutionProvider, ort.OrtHardwareDeviceType.NPU)\n", + "\n", + "session = ort.InferenceSession(\n", + " onnx_model_path, # a model with QNN EPContext nodes\n", + " sess_options=session_options,\n", + ")\n", + "\n", + "print(\"Available providers:\", session.get_providers())\n", + "print(\"Current provider:\", session.get_provider_options())\n", + "\n", + "# Read and preprocess image\n", + "image = Image.open(image_file_path)\n", + "preprocess = transforms.Compose([\n", + " transforms.Resize(256),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n", + "])\n", + "input_tensor = preprocess(image)\n", + "input_batch = input_tensor.unsqueeze(0)\n", + "\n", + "# Run inference\n", + "ort_inputs = {session.get_inputs()[0].name: input_batch.numpy().astype(np.float16)}\n", + "ort_outputs = session.run(None, ort_inputs)\n", + "\n", + "# Postprocess to get softmax vector\n", + "output = ort_outputs[0]\n", + "softmax = torch.nn.functional.softmax(torch.tensor(output), dim=1)\n", + "\n", + "# Extract top 10 predicted classes\n", + "top10 = torch.topk(softmax, 10)\n", + "\n", + "# Get label mapping\n", + "weights = ResNet50_Weights.DEFAULT\n", + "labels = weights.meta[\"categories\"]\n", + "\n", + "# Print results to console\n", + "print(\"Top 10 predictions for ResNet50 v2...\")\n", + "print(\"--------------------------------------------------------------\")\n", + "for i in range(10):\n", + " print(f\"Label: {labels[top10.indices[0][i]]}, Confidence: {top10.values[0][i].item():.4f}\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cpu", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/microsoft-resnet-50/aitk/winml.py b/microsoft-resnet-50/aitk/winml.py new file mode 100644 index 00000000..74a12c53 --- /dev/null +++ b/microsoft-resnet-50/aitk/winml.py @@ -0,0 +1,21 @@ +import json + +def _get_ep_paths() -> dict[str, str]: + from winui3.microsoft.windows.applicationmodel.dynamicdependency.bootstrap import ( + InitializeOptions, + initialize + ) + import winui3.microsoft.windows.ai.machinelearning as winml + eps = {} + with initialize(options = InitializeOptions.ON_NO_MATCH_SHOW_UI): + catalog = winml.ExecutionProviderCatalog.get_default() + providers = catalog.find_all_providers() + for provider in providers: + provider.ensure_ready_async().get() + eps[provider.name] = provider.library_path + # DO NOT call provider.try_register in python. That will register to the native env. + return eps + +if __name__ == "__main__": + eps = _get_ep_paths() + print(json.dumps(eps)) diff --git a/openai-clip-vit-base-patch16/aitk/_copy.json.config b/openai-clip-vit-base-patch16/aitk/_copy.json.config index 2f6d2216..4d2ae5d3 100644 --- a/openai-clip-vit-base-patch16/aitk/_copy.json.config +++ b/openai-clip-vit-base-patch16/aitk/_copy.json.config @@ -24,6 +24,16 @@ } ] }, + { + "src": "openai_clip_trtrtx_inference_sample.ipynb", + "dst": "openai_clip_webgpu_inference_sample.ipynb", + "replacements": [ + { + "find": "NvTensorRTRTXExecutionProvider", + "replace": "WebGpuExecutionProvider" + } + ] + }, { "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", "dst": "winml.py" diff --git a/openai-clip-vit-base-patch16/aitk/info.yml b/openai-clip-vit-base-patch16/aitk/info.yml index 15f8f493..46122f8a 100644 --- a/openai-clip-vit-base-patch16/aitk/info.yml +++ b/openai-clip-vit-base-patch16/aitk/info.yml @@ -25,6 +25,9 @@ recipes: - file: "openai_clip_dml.json" device: gpu ep: DmlExecutionProvider + - file: "openai_clip_webgpu.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/openai/clip-vit-base-patch16" diff --git a/openai-clip-vit-base-patch16/aitk/model_project.config b/openai-clip-vit-base-patch16/aitk/model_project.config index c89ea0ed..13e1c771 100644 --- a/openai-clip-vit-base-patch16/aitk/model_project.config +++ b/openai-clip-vit-base-patch16/aitk/model_project.config @@ -23,6 +23,10 @@ { "file": "openai_clip_dml.json", "templateName": "openai_clip_dml" + }, + { + "file": "openai_clip_webgpu.json", + "templateName": "openai_clip_webgpu" } ], "modelInfo": { diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json new file mode 100644 index 00000000..97493fc9 --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json @@ -0,0 +1,206 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "openai/clip-vit-base-patch16", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image", + "logits_per_text", + "text_embeds", + "image_embeds" + ], + "output_shapes": [ + [ + 1, + 10 + ], + [ + 10, + 1 + ], + [ + 10, + 512 + ], + [ + 1, + 512 + ] + ] + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "WebGpuExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch16", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "search_strategy": false, + "host": "host_system", + "target": "target_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip" +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config new file mode 100644 index 00000000..d17c25fa --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config @@ -0,0 +1,84 @@ +{ + "name": "Convert to WebGPU", + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGPU" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].load_dataset_config.end", + "template": { + "path": "data_configs[0].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb new file mode 100644 index 00000000..9e2d7c1c --- /dev/null +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb @@ -0,0 +1,115 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"WebGpuExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22477669", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch16\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "winml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openai-clip-vit-base-patch32/aitk/_copy.json.config b/openai-clip-vit-base-patch32/aitk/_copy.json.config index a771d852..8c3918b3 100644 --- a/openai-clip-vit-base-patch32/aitk/_copy.json.config +++ b/openai-clip-vit-base-patch32/aitk/_copy.json.config @@ -180,6 +180,31 @@ } ] }, + { + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json", + "dst": "openai_clip_webgpu.json", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, + { + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json.config", + "dst": "openai_clip_webgpu.json.config", + "replacements": [] + }, + { + "src": "../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu_inference_sample.ipynb", + "dst": "openai_clip_webgpu_inference_sample.ipynb", + "replacements": [ + { + "find": "openai/clip-vit-base-patch16", + "replace": "openai/clip-vit-base-patch32" + } + ] + }, { "src": "../../openai-clip-vit-base-patch16/aitk/clip_script.py", "dst": "clip_script.py" diff --git a/openai-clip-vit-base-patch32/aitk/info.yml b/openai-clip-vit-base-patch32/aitk/info.yml index 86f4fee9..4e8c7593 100644 --- a/openai-clip-vit-base-patch32/aitk/info.yml +++ b/openai-clip-vit-base-patch32/aitk/info.yml @@ -25,6 +25,9 @@ recipes: - file: "openai_clip_dml.json" device: gpu ep: DmlExecutionProvider + - file: "openai_clip_webgpu.json" + device: gpu + ep: WebGpuExecutionProvider aitk: modelInfo: id: "huggingface/openai/clip-vit-base-patch32" diff --git a/openai-clip-vit-base-patch32/aitk/model_project.config b/openai-clip-vit-base-patch32/aitk/model_project.config index f2bd423e..234e99a8 100644 --- a/openai-clip-vit-base-patch32/aitk/model_project.config +++ b/openai-clip-vit-base-patch32/aitk/model_project.config @@ -23,6 +23,10 @@ { "file": "openai_clip_dml.json", "templateName": "openai_clip_dml" + }, + { + "file": "openai_clip_webgpu.json", + "templateName": "openai_clip_webgpu" } ], "modelInfo": { diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json new file mode 100644 index 00000000..205dd9fa --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json @@ -0,0 +1,206 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "openai/clip-vit-base-patch32", + "task": "zero-shot-image-classification", + "load_kwargs": { + "attn_implementation": "eager" + }, + "io_config": { + "input_names": [ + "input_ids", + "pixel_values", + "attention_mask" + ], + "input_shapes": [ + [ + 10, + 77 + ], + [ + 1, + 3, + 224, + 224 + ], + [ + 10, + 77 + ] + ], + "input_types": [ + "int64", + "float32", + "int64" + ], + "output_names": [ + "logits_per_image", + "logits_per_text", + "text_embeds", + "image_embeds" + ], + "output_shapes": [ + [ + 1, + 10 + ], + [ + 10, + 1 + ], + [ + 10, + 512 + ], + [ + 1, + 512 + ] + ] + } + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device": "gpu", + "execution_providers": [ + "WebGpuExecutionProvider" + ] + } + ] + } + }, + "data_configs": [ + { + "name": "metric_data_config", + "user_script": "user_script.py", + "load_dataset_config": { + "type": "clip_dataset", + "model_name": "openai/clip-vit-base-patch32", + "dataset_name": "nlphuji/flickr30k", + "start": 0, + "end": 10 + }, + "dataloader_config": { + "type": "no_auto_batch_dataloader" + }, + "post_process_data_config": { + "type": "clip_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "accuracy", + "priority": 1, + "goal": { + "type": "max-degradation", + "value": 0.05 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg", + "goal": { + "type": "percent-min-improvement", + "value": 0.1 + } + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + }, + { + "name": "throughput", + "type": "throughput", + "data_config": "metric_data_config", + "sub_types": [ + { + "name": "avg" + }, + { + "name": "max" + }, + { + "name": "min" + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true + }, + "transformer_optimizer": { + "type": "orttransformersoptimization", + "model_type": "clip", + "opt_level": 0, + "float16": true, + "use_gpu": true, + "keep_io_types": false, + "optimization_options": { + "enable_gelu": true, + "enable_layer_norm": true, + "enable_attention": true, + "enable_skip_layer_norm": false, + "enable_embed_layer_norm": false, + "enable_bias_skip_layer_norm": false, + "enable_bias_gelu": false, + "enable_gelu_approximation": false, + "enable_qordered_matmul": false, + "enable_shape_inference": true, + "enable_gemm_fast_gelu": false, + "enable_nhwc_conv": false, + "enable_group_norm": false, + "enable_bias_splitgelu": false, + "enable_packed_qkv": true, + "enable_packed_kv": true, + "enable_bias_add": false, + "enable_rotary_embeddings": true + }, + "save_as_external_data": true + } + }, + "search_strategy": false, + "host": "host_system", + "target": "target_system", + "cache_dir": "cache", + "evaluator": "common_evaluator", + "evaluate_input_model": false, + "output_dir": "model/clip" +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config new file mode 100644 index 00000000..d17c25fa --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json.config @@ -0,0 +1,84 @@ +{ + "name": "Convert to WebGPU", + "addCpu": false, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "WebGPU" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "WebGpuExecutionProvider" + ], + "readOnly": false + }, + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.conversion", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "name": "Evaluate", + "phase": "Evaluation", + "parameters": [ + { + "name": "Evaluation Dataset", + "tags": [ + "EvaluationDataset" + ], + "type": "enum", + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": { + "path": "data_configs[0].load_dataset_config.dataset_name", + "values": [ + "nlphuji/flickr30k" + ], + "template": "EvaluationDataset" + } + }, + { + "name": "Evaluation Dataset Size", + "type": "int", + "path": "data_configs[0].load_dataset_config.end", + "template": { + "path": "data_configs[0].load_dataset_config.end", + "template": "EvaluationDatasetSize" + } + } + ], + "toggle": { + "autoGenerated": true, + "name": "Evaluate model performance", + "type": "bool", + "path": "evaluator", + "actions": [ + [], + [ + { + "type": "delete", + "path": "evaluator" + } + ] + ] + } + } + ] +} diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb new file mode 100644 index 00000000..015cd8a1 --- /dev/null +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu_inference_sample.ipynb @@ -0,0 +1,115 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aeb33f1a", + "metadata": {}, + "outputs": [], + "source": [ + "onnx_model_path = \"./model/model.onnx\"\n", + "ExecutionProvider=\"WebGpuExecutionProvider\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22477669", + "metadata": {}, + "outputs": [], + "source": [ + "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", + "import subprocess\n", + "import json\n", + "import sys\n", + "import os\n", + "import onnxruntime as ort\n", + "\n", + "def register_execution_providers():\n", + " worker_script = os.path.abspath('winml.py')\n", + " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", + " paths = json.loads(result)\n", + " for item in paths.items():\n", + " ort.register_execution_provider_library(item[0], item[1])\n", + "\n", + "register_execution_providers()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "307fcca8", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import requests\n", + " \n", + "from transformers import CLIPProcessor\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "import torch\n", + " \n", + "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-base-patch32\", use_fast=False)\n", + " \n", + "url = \"http://images.cocodataset.org/val2017/000000039769.jpg\"\n", + "image = Image.open(requests.get(url, stream=True).raw)\n", + " \n", + "inputs = processor(text=[\"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\", \"a photo of a cat\", \"a photo of a dog\"],\n", + " images=image, return_tensors=\"np\", padding=\"max_length\",\n", + " max_length= 77, truncation=True)\n", + " \n", + "\n", + "def add_ep_for_device(session_options, ep_name, device_type, ep_options=None):\n", + " ep_devices = ort.get_ep_devices()\n", + " for ep_device in ep_devices:\n", + " if ep_device.ep_name == ep_name and ep_device.device.type == device_type:\n", + " print(f\"Adding {ep_name} for {device_type}\")\n", + " session_options.add_provider_for_devices([ep_device], {} if ep_options is None else ep_options)\n", + " break\n", + " \n", + "opts = ort.SessionOptions()\n", + " \n", + "add_ep_for_device(opts, ExecutionProvider, ort.OrtHardwareDeviceType.GPU)\n", + "assert opts.has_providers()\n", + "\n", + "# options = ort.SessionOptions()\n", + "session = ort.InferenceSession(onnx_model_path,\n", + " sess_options=opts,\n", + " # providers=[ExecutionProvider],\n", + " # provider_options=[provider_options]\n", + ")\n", + "logits_per_image = session.run([\"logits_per_image\"],\n", + " {\n", + " \"input_ids\": inputs['input_ids'].astype(np.int64),\n", + " \"attention_mask\": inputs['attention_mask'].astype(np.int64),\n", + " \"pixel_values\": inputs['pixel_values'].astype(np.float16)\n", + " })\n", + " \n", + "probs = torch.tensor(logits_per_image[0]).softmax(dim=1)\n", + "print(\"Label probs:\", probs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "winml", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 355b2fe96b1a45b97948a3973dfbda2560179c08 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Wed, 20 Aug 2025 14:54:21 +0800 Subject: [PATCH 08/14] update olive, fix clip --- .aitk/requirements/Intel/Test_py3.12.9.txt | 4 +-- .../requirements-IntelNPU-Nightly.txt | 2 +- .aitk/requirements/requirements-IntelNPU.txt | 4 +-- .aitk/requirements/requirements-NvidiaGPU.txt | 4 +-- .aitk/requirements/requirements-QNN.txt | 4 +-- .aitk/requirements/requirements-WCR.txt | 4 +-- .aitk/requirements/requirements-WCR_CUDA.txt | 4 +-- .aitk/scripts/install_freeze.py | 2 +- .../aitk/laion_clip_webgpu.json | 29 ++----------------- .../aitk/openai_clip_webgpu.json | 29 ++----------------- .../aitk/openai_clip_webgpu.json | 29 ++----------------- 11 files changed, 20 insertions(+), 95 deletions(-) diff --git a/.aitk/requirements/Intel/Test_py3.12.9.txt b/.aitk/requirements/Intel/Test_py3.12.9.txt index 8f922797..127a389c 100644 --- a/.aitk/requirements/Intel/Test_py3.12.9.txt +++ b/.aitk/requirements/Intel/Test_py3.12.9.txt @@ -11,8 +11,8 @@ typing-extensions==4.13.2 onnx==1.17.0 numpy==1.26.4 protobuf==6.31.1 -# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai -olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai +# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai +olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai numpy==1.26.4 onnx==1.17.0 onnx-ir==0.1.5 diff --git a/.aitk/requirements/requirements-IntelNPU-Nightly.txt b/.aitk/requirements/requirements-IntelNPU-Nightly.txt index e41a4bf5..a9cef2cc 100644 --- a/.aitk/requirements/requirements-IntelNPU-Nightly.txt +++ b/.aitk/requirements/requirements-IntelNPU-Nightly.txt @@ -1,4 +1,4 @@ -# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai depends on optimum[openvino]<=1.24 +# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai depends on optimum[openvino]<=1.24 # optimum[openvino]==1.24.0 depends on transformers<4.49.0 # phi4 depends on transformers>=4.49.0 transformers==4.49.0 diff --git a/.aitk/requirements/requirements-IntelNPU.txt b/.aitk/requirements/requirements-IntelNPU.txt index 3276a394..b5ce8251 100644 --- a/.aitk/requirements/requirements-IntelNPU.txt +++ b/.aitk/requirements/requirements-IntelNPU.txt @@ -11,8 +11,8 @@ typing-extensions==4.13.2 onnx==1.17.0 numpy==1.26.4 protobuf==6.30.2 -# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai -olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai +# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai +olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai numpy==1.26.4 onnx==1.17.0 onnx-ir==0.1.5 diff --git a/.aitk/requirements/requirements-NvidiaGPU.txt b/.aitk/requirements/requirements-NvidiaGPU.txt index 74924d54..7e63e65f 100644 --- a/.aitk/requirements/requirements-NvidiaGPU.txt +++ b/.aitk/requirements/requirements-NvidiaGPU.txt @@ -11,8 +11,8 @@ typing-extensions==4.13.2 onnx==1.17.0 numpy==2.2.4 protobuf==3.20.3 -# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai -olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai +# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai +olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai onnx-ir==0.1.5 onnxscript==0.3.2 optuna==4.3.0 diff --git a/.aitk/requirements/requirements-QNN.txt b/.aitk/requirements/requirements-QNN.txt index 25c93b54..79d145c5 100644 --- a/.aitk/requirements/requirements-QNN.txt +++ b/.aitk/requirements/requirements-QNN.txt @@ -2,8 +2,8 @@ onnx==1.17.0 numpy==2.2.4 protobuf==3.20.3 -# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai -olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai +# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai +olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai numpy==2.2.4 onnx==1.17.0 onnx-ir==0.1.5 diff --git a/.aitk/requirements/requirements-WCR.txt b/.aitk/requirements/requirements-WCR.txt index 7e3a9ce9..8c5ef6af 100644 --- a/.aitk/requirements/requirements-WCR.txt +++ b/.aitk/requirements/requirements-WCR.txt @@ -2,8 +2,8 @@ onnx==1.17.0 numpy==2.2.4 protobuf==6.30.2 -# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai -olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai +# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai +olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai onnx-ir==0.1.5 onnxscript==0.3.2 optuna==4.2.1 diff --git a/.aitk/requirements/requirements-WCR_CUDA.txt b/.aitk/requirements/requirements-WCR_CUDA.txt index ceb0a03d..6564f7c3 100644 --- a/.aitk/requirements/requirements-WCR_CUDA.txt +++ b/.aitk/requirements/requirements-WCR_CUDA.txt @@ -12,8 +12,8 @@ typing-extensions==4.14.0 onnx==1.17.0 numpy==2.2.4 protobuf==6.30.2 -# olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai -olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai +# olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai +olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai onnx-ir==0.1.5 onnxscript==0.3.2 optuna==4.2.1 diff --git a/.aitk/scripts/install_freeze.py b/.aitk/scripts/install_freeze.py index 3f4bea20..b328824f 100644 --- a/.aitk/scripts/install_freeze.py +++ b/.aitk/scripts/install_freeze.py @@ -60,7 +60,7 @@ def get_requires(name: str, args): def main(): # Constants # if from git: "git+https://github.com/microsoft/Olive.git@COMMIT_ID#egg=olive_ai - oliveAi = "olive-ai@git+https://github.com/microsoft/Olive.git@8ff071c0ae9b1c38c0619ee72e8cb031957c63c4#egg=olive-ai" + oliveAi = "olive-ai@git+https://github.com/microsoft/Olive.git@19abbd99463db9f608e3124237c1ecc74ac6e92e#egg=olive-ai" torchVision = "torchvision==0.22.0" pre = { RuntimeEnum.NvidiaGPU: [ diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json index 947df9d3..bb76bded 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json @@ -166,33 +166,8 @@ "target_opset": 17, "save_as_external_data": true }, - "transformer_optimizer": { - "type": "orttransformersoptimization", - "model_type": "clip", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false, - "optimization_options": { - "enable_gelu": true, - "enable_layer_norm": true, - "enable_attention": true, - "enable_skip_layer_norm": false, - "enable_embed_layer_norm": false, - "enable_bias_skip_layer_norm": false, - "enable_bias_gelu": false, - "enable_gelu_approximation": false, - "enable_qordered_matmul": false, - "enable_shape_inference": true, - "enable_gemm_fast_gelu": false, - "enable_nhwc_conv": false, - "enable_group_norm": false, - "enable_bias_splitgelu": false, - "enable_packed_qkv": true, - "enable_packed_kv": true, - "enable_bias_add": false, - "enable_rotary_embeddings": true - }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", "save_as_external_data": true } }, diff --git a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json index 97493fc9..39b77871 100644 --- a/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json +++ b/openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json @@ -166,33 +166,8 @@ "target_opset": 17, "save_as_external_data": true }, - "transformer_optimizer": { - "type": "orttransformersoptimization", - "model_type": "clip", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false, - "optimization_options": { - "enable_gelu": true, - "enable_layer_norm": true, - "enable_attention": true, - "enable_skip_layer_norm": false, - "enable_embed_layer_norm": false, - "enable_bias_skip_layer_norm": false, - "enable_bias_gelu": false, - "enable_gelu_approximation": false, - "enable_qordered_matmul": false, - "enable_shape_inference": true, - "enable_gemm_fast_gelu": false, - "enable_nhwc_conv": false, - "enable_group_norm": false, - "enable_bias_splitgelu": false, - "enable_packed_qkv": true, - "enable_packed_kv": true, - "enable_bias_add": false, - "enable_rotary_embeddings": true - }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", "save_as_external_data": true } }, diff --git a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json index 205dd9fa..6198b710 100644 --- a/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json +++ b/openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json @@ -166,33 +166,8 @@ "target_opset": 17, "save_as_external_data": true }, - "transformer_optimizer": { - "type": "orttransformersoptimization", - "model_type": "clip", - "opt_level": 0, - "float16": true, - "use_gpu": true, - "keep_io_types": false, - "optimization_options": { - "enable_gelu": true, - "enable_layer_norm": true, - "enable_attention": true, - "enable_skip_layer_norm": false, - "enable_embed_layer_norm": false, - "enable_bias_skip_layer_norm": false, - "enable_bias_gelu": false, - "enable_gelu_approximation": false, - "enable_qordered_matmul": false, - "enable_shape_inference": true, - "enable_gemm_fast_gelu": false, - "enable_nhwc_conv": false, - "enable_group_norm": false, - "enable_bias_splitgelu": false, - "enable_packed_qkv": true, - "enable_packed_kv": true, - "enable_bias_add": false, - "enable_rotary_embeddings": true - }, + "onnx_float_to_float16": { + "type": "OnnxFloatToFloat16", "save_as_external_data": true } }, From 0a9345f0c50cdf6496bb0886b02aa7f6a95731ff Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Wed, 20 Aug 2025 15:58:25 +0800 Subject: [PATCH 09/14] fix comments --- .aitk/configs/model_list.json | 26 +++++++++---------- .aitk/scripts/model_lab/__init__.py | 2 +- .aitk/scripts/sanitize/utils.py | 6 ++--- .../aitk/qwen2_5_webgpu_config.json | 2 +- .../aitk/deepseek_webgpu_config.json | 2 +- .../aitk/inference_sample.ipynb | 20 +------------- ...ase-patch16-224_dml_inference_sample.ipynb | 20 +------------- ...-patch16-224_trtrtx_inference_sample.ipynb | 20 +------------- ...-patch16-224_webgpu_inference_sample.ipynb | 20 +------------- .../aitk/llama3_2_webgpu_config.json | 2 +- .../aitk/phi3_5_webgpu_config.json | 2 +- 11 files changed, 25 insertions(+), 97 deletions(-) diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index eb795b84..f46f7170 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -28,7 +28,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -78,7 +78,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -98,7 +98,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -118,7 +118,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -138,7 +138,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -158,7 +158,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -208,7 +208,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -282,7 +282,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "CNN", "status": "Ready", @@ -315,7 +315,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -335,7 +335,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -385,7 +385,7 @@ "IntelGPU", "IntelNPU", "DML", - "WEBGPU" + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -592,7 +592,7 @@ "NVIDIA GPU": "NvidiaGPU", "NVIDIA TensorRT for RTX": "NvidiaTRTRTX", "DirectML": "DML", - "WebGPU": "WEBGPU" + "WebGPU": "WebGPU" }, "RuntimeToDisplayName": { "CPU": "CPU", @@ -605,6 +605,6 @@ "NvidiaGPU": "NVIDIA GPU", "NvidiaTRTRTX": "NVIDIA TensorRT for RTX", "DML": "DirectML", - "WEBGPU": "WebGPU" + "WebGPU": "WebGPU" } } diff --git a/.aitk/scripts/model_lab/__init__.py b/.aitk/scripts/model_lab/__init__.py index 7ff8cf8b..d0b95633 100644 --- a/.aitk/scripts/model_lab/__init__.py +++ b/.aitk/scripts/model_lab/__init__.py @@ -11,7 +11,7 @@ class RuntimeEnum(Enum): IntelGPU = "IntelGPU" IntelNPU = "IntelNPU" DML = "DML" - WEBGPU = "WEBGPU" + WebGPU = "WebGPU" NvidiaGPU = "NvidiaGPU" WCR = "WCR" WCR_CUDA = "WCR_CUDA" diff --git a/.aitk/scripts/sanitize/utils.py b/.aitk/scripts/sanitize/utils.py index 97806f90..4fa746f7 100644 --- a/.aitk/scripts/sanitize/utils.py +++ b/.aitk/scripts/sanitize/utils.py @@ -43,7 +43,7 @@ class GlobalVars: RuntimeEnum.NvidiaGPU: EPNames.CUDAExecutionProvider, RuntimeEnum.NvidiaTRTRTX: EPNames.NvTensorRTRTXExecutionProvider, RuntimeEnum.DML: EPNames.DmlExecutionProvider, - RuntimeEnum.WEBGPU: EPNames.WebGpuExecutionProvider, + RuntimeEnum.WebGPU: EPNames.WebGpuExecutionProvider, } RuntimeToOliveDeviceType = { RuntimeEnum.CPU: OliveDeviceTypes.CPU, @@ -55,7 +55,7 @@ class GlobalVars: RuntimeEnum.AMDNPU: OliveDeviceTypes.NPU, RuntimeEnum.NvidiaGPU: OliveDeviceTypes.GPU, RuntimeEnum.DML: OliveDeviceTypes.GPU, - RuntimeEnum.WEBGPU: OliveDeviceTypes.GPU, + RuntimeEnum.WebGPU: OliveDeviceTypes.GPU, } RuntimeToDisplayName = { RuntimeEnum.CPU: "CPU", @@ -68,7 +68,7 @@ class GlobalVars: RuntimeEnum.NvidiaGPU: "NVIDIA GPU", RuntimeEnum.NvidiaTRTRTX: "NVIDIA TensorRT for RTX", RuntimeEnum.DML: "DirectML", - RuntimeEnum.WEBGPU: "WebGPU", + RuntimeEnum.WebGPU: "WebGPU", } @classmethod diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json index 8c4c0f8c..d4ae439c 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json @@ -35,4 +35,4 @@ "cache_dir": "cache", "no_artifacts": true, "evaluate_input_model": false -} \ No newline at end of file +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json index 34cd324f..fd193ded 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json @@ -35,4 +35,4 @@ "cache_dir": "cache", "no_artifacts": true, "evaluate_input_model": false -} \ No newline at end of file +} diff --git a/google-vit-base-patch16-224/aitk/inference_sample.ipynb b/google-vit-base-patch16-224/aitk/inference_sample.ipynb index ddcf70b1..f2dee65a 100644 --- a/google-vit-base-patch16-224/aitk/inference_sample.ipynb +++ b/google-vit-base-patch16-224/aitk/inference_sample.ipynb @@ -209,25 +209,7 @@ ] } ], - "metadata": { - "kernelspec": { - "display_name": "Python-WCR-win32-x64-3.12.9", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.9" - } - }, + "metadata": {}, "nbformat": 4, "nbformat_minor": 4 } diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb index e7c6d91e..19adf2a8 100644 --- a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml_inference_sample.ipynb @@ -209,25 +209,7 @@ ] } ], - "metadata": { - "kernelspec": { - "display_name": "Python-WCR-win32-x64-3.12.9", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.9" - } - }, + "metadata": {}, "nbformat": 4, "nbformat_minor": 4 } diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb index c7bb23c5..7b3c0075 100644 --- a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx_inference_sample.ipynb @@ -209,25 +209,7 @@ ] } ], - "metadata": { - "kernelspec": { - "display_name": "Python-WCR-win32-x64-3.12.9", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.9" - } - }, + "metadata": {}, "nbformat": 4, "nbformat_minor": 4 } diff --git a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb index fa55eb61..4f3ab189 100644 --- a/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb +++ b/google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu_inference_sample.ipynb @@ -209,25 +209,7 @@ ] } ], - "metadata": { - "kernelspec": { - "display_name": "Python-WCR-win32-x64-3.12.9", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.9" - } - }, + "metadata": {}, "nbformat": 4, "nbformat_minor": 4 } diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json index c27b5153..4a2e2a3f 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json @@ -35,4 +35,4 @@ "cache_dir": "cache", "no_artifacts": true, "evaluate_input_model": false -} \ No newline at end of file +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json index 71037506..3f1a4b36 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json @@ -35,4 +35,4 @@ "cache_dir": "cache", "no_artifacts": true, "evaluate_input_model": false -} \ No newline at end of file +} From ccbac486caa11e4d9bc8c83457f02bbb2aee6029 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Tue, 26 Aug 2025 11:18:54 +0800 Subject: [PATCH 10/14] update readme --- Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md | 1 + deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md | 1 + google-bert-bert-base-multilingual-cased/aitk/README.md | 2 +- google-vit-base-patch16-224/aitk/README.md | 2 +- intel-bert-base-uncased-mrpc/aitk/README.md | 2 +- laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md | 4 ++-- meta-llama-Llama-3.2-1B-Instruct/aitk/README.md | 1 + microsoft-Phi-3.5-mini-instruct/aitk/README.md | 1 + microsoft-resnet-50/aitk/README.md | 2 +- openai-clip-vit-base-patch16/aitk/README.md | 4 ++-- openai-clip-vit-base-patch32/aitk/README.md | 4 ++-- 11 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md index 5eb0ccc1..1722fceb 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md @@ -7,6 +7,7 @@ This repository demonstrates the optimization of the [Qwen2.5-1.5B-Instruct](htt + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** - OpenVINO for Intel NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` +- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU - DML for general GPU + This process uses AutoAWQ and ModelBuilder diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md index c34dd7c3..7977e246 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md @@ -7,6 +7,7 @@ This repository demonstrates the optimization of the [DeepSeek-R1-Distill-Qwen-1 + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** - OpenVINO for Intel NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` +- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU - DML for general GPU + This process uses AutoAWQ and ModelBuilder diff --git a/google-bert-bert-base-multilingual-cased/aitk/README.md b/google-bert-bert-base-multilingual-cased/aitk/README.md index 46ba8a03..70a8e739 100644 --- a/google-bert-bert-base-multilingual-cased/aitk/README.md +++ b/google-bert-bert-base-multilingual-cased/aitk/README.md @@ -4,7 +4,7 @@ This folder contains examples of BERT optimization using different workflows. - QDQ for Qualcomm NPU / AMD NPU - OpenVINO for Intel NPU -- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU +- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## QDQ for Qualcomm NPU / AMD NPU diff --git a/google-vit-base-patch16-224/aitk/README.md b/google-vit-base-patch16-224/aitk/README.md index 328dea84..00cdce44 100644 --- a/google-vit-base-patch16-224/aitk/README.md +++ b/google-vit-base-patch16-224/aitk/README.md @@ -4,7 +4,7 @@ This folder contains examples of VIT optimization using different workflows. - QDQ for Qualcomm NPU / AMD NPU - OpenVINO for Intel NPU -- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU +- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## Optimization Workflows diff --git a/intel-bert-base-uncased-mrpc/aitk/README.md b/intel-bert-base-uncased-mrpc/aitk/README.md index 04380886..6cedd50d 100644 --- a/intel-bert-base-uncased-mrpc/aitk/README.md +++ b/intel-bert-base-uncased-mrpc/aitk/README.md @@ -4,7 +4,7 @@ This folder contains examples of BERT optimization using different workflows. - QDQ for Qualcomm NPU / AMD NPU - OpenVINO for Intel NPU -- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU +- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## QDQ for Qualcomm NPU / AMD NPU diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md index 0704a091..b55283ed 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md @@ -5,7 +5,7 @@ This folder contains examples of Laion Clip optimization using different workflo - Text and vision model QDQ for Qualcomm NPU - QDQ for AMD NPU - OpenVINO for Intel NPU -- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU +- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## Laion Clip text optimization with QDQ for Qualcomm NPU @@ -48,7 +48,7 @@ This example performs Laion Clip optimization with QDQ in one workflow. It perfo This example performs Laion Clip optimization with OpenVINO in one workflow for Intel NPU. -## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU +## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU It performs the optimization pipeline: diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md index 09ecd4bb..37c417c8 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md @@ -7,6 +7,7 @@ This repository demonstrates the optimization of the [Llama-3.2-1B-Instruct](htt + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** - OpenVINO for Intel NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` +- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU - DML for general GPU + This process uses AutoAWQ and ModelBuilder diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/README.md b/microsoft-Phi-3.5-mini-instruct/aitk/README.md index d045b20c..2c1cf644 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/README.md +++ b/microsoft-Phi-3.5-mini-instruct/aitk/README.md @@ -7,6 +7,7 @@ This repository demonstrates the optimization of the [Microsoft Phi-3.5 Mini Ins + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** - OpenVINO for Intel NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` +- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU - DML for general GPU + This process uses AutoAWQ and ModelBuilder diff --git a/microsoft-resnet-50/aitk/README.md b/microsoft-resnet-50/aitk/README.md index 1d38f19e..71044382 100644 --- a/microsoft-resnet-50/aitk/README.md +++ b/microsoft-resnet-50/aitk/README.md @@ -4,7 +4,7 @@ This folder contains examples of ResNet optimization using different workflows. - QDQ for Qualcomm NPU / AMD NPU - OpenVINO for Intel NPU -- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU +- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## QDQ for Qualcomm NPU / AMD NPU diff --git a/openai-clip-vit-base-patch16/aitk/README.md b/openai-clip-vit-base-patch16/aitk/README.md index 84c588bc..036bf19a 100644 --- a/openai-clip-vit-base-patch16/aitk/README.md +++ b/openai-clip-vit-base-patch16/aitk/README.md @@ -5,7 +5,7 @@ This folder contains examples of Openai Clip optimization using different workfl - Text and vision model QDQ for Qualcomm NPU - QDQ for AMD NPU - OpenVINO for Intel NPU -- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU +- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## Openai Clip text optimization with QDQ for Qualcomm NPU @@ -48,7 +48,7 @@ This example performs Openai Clip optimization with QDQ in one workflow. It perf This example performs Openai Clip optimization with OpenVINO in one workflow for Intel NPU. -## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU +## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU It performs the optimization pipeline: diff --git a/openai-clip-vit-base-patch32/aitk/README.md b/openai-clip-vit-base-patch32/aitk/README.md index 84c588bc..036bf19a 100644 --- a/openai-clip-vit-base-patch32/aitk/README.md +++ b/openai-clip-vit-base-patch32/aitk/README.md @@ -5,7 +5,7 @@ This folder contains examples of Openai Clip optimization using different workfl - Text and vision model QDQ for Qualcomm NPU - QDQ for AMD NPU - OpenVINO for Intel NPU -- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU +- Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## Openai Clip text optimization with QDQ for Qualcomm NPU @@ -48,7 +48,7 @@ This example performs Openai Clip optimization with QDQ in one workflow. It perf This example performs Openai Clip optimization with OpenVINO in one workflow for Intel NPU. -## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU +## Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU It performs the optimization pipeline: From 025f9e969451f86f85f1544255570d6b551d5cb6 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Tue, 14 Oct 2025 17:51:39 +0800 Subject: [PATCH 11/14] sanitize --- .aitk/configs/checks.json | 19 +- .aitk/configs/model_list.json | 174 +----------------- .aitk/docs/guide/ModelList.md | 22 +-- .../aitk/qwen2_5_webgpu_config.json.config | 1 + .../aitk/deepseek_webgpu_config.json.config | 1 + .../aitk/README.md | 2 +- .../aitk/llama3_2_webgpu_config.json.config | 1 + .../aitk/phi3_5_webgpu_config.json.config | 1 + .../aitk/inference_sample.ipynb | 25 --- .../aitk/inference_sample.ipynb | 25 --- .../aitk/inference_sample.ipynb | 25 --- 11 files changed, 34 insertions(+), 262 deletions(-) diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json index d9b8e40d..6c79cbf4 100644 --- a/.aitk/configs/checks.json +++ b/.aitk/configs/checks.json @@ -1,13 +1,16 @@ { - "configCheck": 101, + "configCheck": 109, + "copyCheck": 169, "extensionCheck": 1, - "gitignoreCheck": 32, - "inferenceModelCheck": 22, - "ipynbCheck": 56, - "modelProjectCheck": 33, + "gitignoreCheck": 35, + "inferenceModelCheck": 25, + "ipynbCheck": 53, + "licenseCheck": 34, + "modelProjectCheck": 36, "oliveCheck": 0, - "oliveJsonCheck": 101, - "pathCheck": 792, - "requirementsCheck": 32, + "oliveJsonCheck": 109, + "pathCheck": 704, + "requirementsCheck": 35, + "templateCheck": 1, "venvRequirementsCheck": 12 } diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index 6c903a5a..2d0861a7 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -12,7 +12,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -32,7 +33,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WebGPU" ], "architecture": "CNN", "status": "Ready", @@ -168,146 +170,6 @@ "version": 3, "p0": true }, - { - "displayName": "microsoft/Phi-3-mini-128k-instruct", - "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct", - "id": "huggingface/microsoft/Phi-3-mini-128k-instruct", - "groupId": "huggingface/microsoft/Phi-3-mini-4k-instruct", - "groupItemName": "128K", - "runtimes": [ - "IntelGPU" - ], - "architecture": "Transformer", - "status": "Ready", - "relativePath": "microsoft-Phi-3-mini-128k-instruct/aitk", - "version": 2 - }, - { - "displayName": "microsoft/Phi-3-mini-4k-instruct", - "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", - "id": "huggingface/microsoft/Phi-3-mini-4k-instruct", - "groupId": "huggingface/microsoft/Phi-3-mini-4k-instruct", - "groupItemName": "4K", - "runtimes": [ - "IntelGPU", - "IntelNPU" - ], - "architecture": "Transformer", - "status": "Ready", - "relativePath": "microsoft-Phi-3-mini-4k-instruct/aitk", - "version": 2 - }, - { - "displayName": "microsoft/Phi-3.5-mini-instruct", - "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct", - "id": "huggingface/microsoft/Phi-3.5-mini-instruct", - "runtimes": [ - "QNN", - "AMDNPU", - "NvidiaTRTRTX", - "IntelCPU", - "IntelGPU", - "IntelNPU", - "DML", - "WebGPU" - ], - "architecture": "Transformer", - "status": "Ready", - "relativePath": "microsoft-Phi-3.5-mini-instruct/aitk", - "version": 1, - "p0": true - }, - { - "displayName": "microsoft/Phi-4-mini-instruct", - "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/Phi-4-mini-instruct", - "id": "huggingface/microsoft/Phi-4-mini-instruct", - "runtimes": [ - "IntelGPU", - "IntelNPU" - ], - "architecture": "Transformer", - "status": "Ready", - "relativePath": "microsoft-Phi-4-mini-instruct/aitk", - "version": 2 - }, - { - "displayName": "microsoft/Phi-4-mini-reasoning", - "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/Phi-4-mini-reasoning", - "id": "huggingface/microsoft/Phi-4-mini-reasoning", - "runtimes": [ - "IntelGPU", - "IntelNPU" - ], - "architecture": "Transformer", - "status": "Ready", - "relativePath": "microsoft-Phi-4-mini-reasoning/aitk", - "version": 2 - }, - { - "displayName": "microsoft/Phi-4-reasoning", - "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/Phi-4-reasoning", - "id": "huggingface/microsoft/Phi-4-reasoning", - "runtimes": [ - "IntelNPU" - ], - "architecture": "Transformer", - "status": "Ready", - "relativePath": "microsoft-Phi-4-reasoning/aitk", - "version": 2 - }, - { - "displayName": "microsoft/Phi-4-reasoning-plus", - "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/Phi-4-reasoning-plus", - "id": "huggingface/microsoft/Phi-4-reasoning-plus", - "runtimes": [ - "IntelNPU" - ], - "architecture": "Transformer", - "status": "Ready", - "relativePath": "microsoft-Phi-4-reasoning-plus/aitk", - "version": 2 - }, - { - "displayName": "Microsoft/ResNet-50", - "icon": "ms", - "modelLink": "https://huggingface.co/microsoft/resnet-50", - "id": "huggingface/microsoft/resnet-50", - "runtimes": [ - "QNN", - "AMDNPU", - "NvidiaTRTRTX", - "IntelCPU", - "IntelGPU", - "IntelNPU", - "DML", - "WebGPU" - ], - "architecture": "CNN", - "status": "Ready", - "relativePath": "microsoft-resnet-50/aitk", - "version": 1, - "p0": true - }, - { - "displayName": "mistralai/Mistral-7B-Instruct-v0.3", - "icon": "mistralai", - "modelLink": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3", - "id": "huggingface/mistralai/Mistral-7B-Instruct-v0.3", - "runtimes": [ - "IntelGPU" - ], - "architecture": "Transformer", - "status": "Ready", - "relativePath": "mistralai-Mistral-7B-Instruct-v0.3/aitk", - "version": 2 - }, { "displayName": "openai/clip-vit-base-patch16", "icon": "OpenAI", @@ -364,7 +226,8 @@ "IntelCPU", "IntelGPU", "IntelNPU", - "DML" + "DML", + "WebGPU" ], "architecture": "Transformer", "status": "Ready", @@ -589,30 +452,7 @@ "architecture": "Transformer", "status": "Ready", "relativePath": "Qwen-Qwen2.5-0.5B-Instruct/aitk", - "version": 2 - }, - { - "displayName": "Qwen/Qwen2.5-1.5B-Instruct", - "icon": "qwen", - "modelLink": "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct", - "id": "huggingface/Qwen/Qwen2.5-1.5B-Instruct", - "groupId": "huggingface/Qwen/Qwen2.5-1.5B-Instruct", - "groupItemName": "1.5B", - "runtimes": [ - "QNN", - "AMDNPU", - "NvidiaTRTRTX", - "IntelCPU", - "IntelGPU", - "IntelNPU", - "DML", - "WebGPU" - ], - "architecture": "Transformer", - "status": "Ready", - "relativePath": "Qwen-Qwen2.5-1.5B-Instruct/aitk", - "version": 1, - "p0": true + "version": 3 }, { "displayName": "Qwen/Qwen2.5-3B-Instruct", diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md index e55a23d5..c7654b37 100644 --- a/.aitk/docs/guide/ModelList.md +++ b/.aitk/docs/guide/ModelList.md @@ -5,23 +5,23 @@ | Model Name | Supported Runtimes | |------------|--------------------| | [Deepseek R1 Distill Llama 8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_npu_config.json) | -| [Deepseek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | [Qualcomm NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json), [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json), [DirectML](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json) | +| [Deepseek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | [Qualcomm NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json), [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json), [DirectML](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json), [WebGPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json) | | [Deepseek R1 Distill Qwen 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_config.json) | | [Deepseek R1 Distill Qwen 7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_npu_config.json) | | [Llama 3.1 8B Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_qnn_config.json), [AMD NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_config.json), [Intel GPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_config.json), [Intel NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_config.json), [DirectML](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_dml_config.json) | -| [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json), [AMD NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [Intel GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [Intel NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [DirectML](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json) | +| [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json), [AMD NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [Intel GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [Intel NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [DirectML](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json), [WebGPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json) | | [Mistral 7B Instruct V0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) | [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_gpu_context_ov_dy.json), [Intel NPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_npu_context_ov_dy.json) | | [Mistral 7B Instruct V0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) | [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json) | | [Phi 3 Mini 128K Instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) | [Intel GPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_config.json) | | [Phi 3 Mini 4K Instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | [Intel GPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_config.json), [Intel NPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_npu_config.json) | -| [Phi 3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json), [AMD NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json), [Intel CPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json), [DirectML](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json) | +| [Phi 3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json), [AMD NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json), [Intel CPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json), [DirectML](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json), [WebGPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json) | | [Phi 4](https://huggingface.co/microsoft/Phi-4) | [Intel GPU](../../../microsoft-Phi-4/aitk/phi4_ov_config.json) | | [Phi 4 Mini Instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) | [Intel GPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_config.json), [Intel NPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_npu_config.json) | | [Phi 4 Mini Reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) | [Intel GPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json) | | [Phi 4 Reasoning](https://huggingface.co/microsoft/Phi-4-reasoning) | [Intel NPU](../../../microsoft-Phi-4-reasoning/aitk/phi4_ov_config.json) | | [Phi 4 Reasoning Plus](https://huggingface.co/microsoft/Phi-4-reasoning-plus) | [Intel NPU](../../../microsoft-Phi-4-reasoning-plus/aitk/phi4_ov_config.json) | | [Qwen2.5 0.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) | [Intel GPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_npu_config.json) | -| [Qwen2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json), [AMD NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json), [Intel CPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json), [DirectML](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json) | +| [Qwen2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json), [AMD NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json), [Intel CPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json), [DirectML](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json), [WebGPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json) | | [Qwen2.5 14B Instruct](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct) | [Intel GPU](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_ov_config.json) | | [Qwen2.5 3B Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct) | [Intel GPU](../../../Qwen-Qwen2.5-3B-Instruct/aitk/qwen2_5_ov_config.json) | | [Qwen2.5 7B Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) | [Intel GPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_npu_config.json) | @@ -34,10 +34,10 @@ | Model Name | Supported Runtimes | |------------|--------------------| -| [Bert Base Multilingual Cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) | [Qualcomm NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json), [AMD NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json), [Intel CPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [DirectML](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json) | -| [Bert Base Uncased Mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) | [Qualcomm NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json), [AMD NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../intel-bert-base-uncased-mrpc/aitk/bert_trtrtx.json), [Intel CPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [DirectML](../../../intel-bert-base-uncased-mrpc/aitk/bert_dml.json) | -| [Clip Vit B 32 Laion2B S34B B79K](https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K) | [Qualcomm NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn.json), [AMD NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json), [Intel CPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [DirectML](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json) | -| [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json) | -| [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json) | -| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json) | -| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json) | +| [Bert Base Multilingual Cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) | [Qualcomm NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_qnn.json), [AMD NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_trtrtx.json), [Intel CPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel GPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [Intel NPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_context_ov_static.json), [DirectML](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_dml.json), [WebGPU](../../../google-bert-bert-base-multilingual-cased/aitk/bert-base-multilingual-cased_webgpu.json) | +| [Bert Base Uncased Mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) | [Qualcomm NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_qnn.json), [AMD NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../intel-bert-base-uncased-mrpc/aitk/bert_trtrtx.json), [Intel CPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel GPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [Intel NPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_ov.json), [DirectML](../../../intel-bert-base-uncased-mrpc/aitk/bert_dml.json), [WebGPU](../../../intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json) | +| [Clip Vit B 32 Laion2B S34B B79K](https://huggingface.co/laion/CLIP-ViT-B-32-laion2B-s34B-b79K) | [Qualcomm NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qnn.json), [AMD NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_trtrtx.json), [Intel CPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel GPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [Intel NPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_ov.json), [DirectML](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_dml.json), [WebGPU](../../../laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/laion_clip_webgpu.json) | +| [Clip Vit Base Patch16](https://huggingface.co/openai/clip-vit-base-patch16) | [Qualcomm NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch16/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch16/aitk/openai_clip_dml.json), [WebGPU](../../../openai-clip-vit-base-patch16/aitk/openai_clip_webgpu.json) | +| [Clip Vit Base Patch32](https://huggingface.co/openai/clip-vit-base-patch32) | [Qualcomm NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qnn.json), [AMD NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../openai-clip-vit-base-patch32/aitk/openai_clip_trtrtx.json), [Intel CPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel GPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [Intel NPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_ov.json), [DirectML](../../../openai-clip-vit-base-patch32/aitk/openai_clip_dml.json), [WebGPU](../../../openai-clip-vit-base-patch32/aitk/openai_clip_webgpu.json) | +| [Resnet 50](https://huggingface.co/microsoft/resnet-50) | [Qualcomm NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_qnn.json), [AMD NPU](../../../microsoft-resnet-50/aitk/resnet_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../microsoft-resnet-50/aitk/resnet_trtrtx.json), [Intel CPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel GPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [Intel NPU](../../../microsoft-resnet-50/aitk/resnet_context_ov_static.json), [DirectML](../../../microsoft-resnet-50/aitk/resnet_dml.json), [WebGPU](../../../microsoft-resnet-50/aitk/resnet_webgpu.json) | +| [Vit Base Patch16 224](https://huggingface.co/google/vit-base-patch16-224) | [Qualcomm NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_qnn.json), [AMD NPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_qdq_amd.json), [NVIDIA TensorRT for RTX](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_trtrtx.json), [Intel CPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel GPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [Intel NPU](../../../google-vit-base-patch16-224/aitk/vit_base_patch16_224_context_ov_static.json), [DirectML](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_dml.json), [WebGPU](../../../google-vit-base-patch16-224/aitk/vit-base-patch16-224_webgpu.json) | diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config index 60a17277..b3e6c90c 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_webgpu_config.json.config @@ -6,6 +6,7 @@ "autoGenerated": true, "useModelBuilder": "builder" }, + "needHFLogin": true, "addCpu": false, "runtime": { "autoGenerated": true, diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config index 60a17277..b3e6c90c 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_webgpu_config.json.config @@ -6,6 +6,7 @@ "autoGenerated": true, "useModelBuilder": "builder" }, + "needHFLogin": true, "addCpu": false, "runtime": { "autoGenerated": true, diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md b/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md index 612cdf2b..8efa8cc5 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/README.md @@ -7,7 +7,7 @@ This repository demonstrates the optimization of the [Llama-3.1-8B-Instruct](htt + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** - OpenVINO for Intel® CPU/GPU/NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` -- Float downcasting for NVIDIA TRT for RTX GPU +- Float downcasting for NVIDIA TRT for RTX GPU / WebGPU for general GPU - DML for general GPU + This process uses AutoAWQ and ModelBuilder diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config index 60a17277..b3e6c90c 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_webgpu_config.json.config @@ -6,6 +6,7 @@ "autoGenerated": true, "useModelBuilder": "builder" }, + "needHFLogin": true, "addCpu": false, "runtime": { "autoGenerated": true, diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config index 60a17277..b3e6c90c 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_webgpu_config.json.config @@ -6,6 +6,7 @@ "autoGenerated": true, "useModelBuilder": "builder" }, + "needHFLogin": true, "addCpu": false, "runtime": { "autoGenerated": true, diff --git a/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb b/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb index 53bac5da..6b78ea4e 100644 --- a/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb +++ b/microsoft-Phi-4-mini-reasoning/aitk/inference_sample.ipynb @@ -19,31 +19,6 @@ "source": [ "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", "import subprocess\n", -<<<<<<< HEAD - "import json\n", - "import sys\n", - "import os\n", - "import onnxruntime_genai as og\n", - "\n", - "def register_execution_providers():\n", - " worker_script = os.path.abspath('winml.py')\n", - " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", - " paths = json.loads(result)\n", - " for item in paths.items():\n", - " og.register_execution_provider_library(item[0], item[1])\n", - "\n", - "register_execution_providers()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import onnxruntime_genai as og\n", -======= ->>>>>>> 014446d171668720d793754bd3073fbc38675bc7 "import json\n", "import sys\n", "import os\n", diff --git a/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb b/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb index 53bac5da..6b78ea4e 100644 --- a/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb +++ b/microsoft-Phi-4-reasoning-plus/aitk/inference_sample.ipynb @@ -19,31 +19,6 @@ "source": [ "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", "import subprocess\n", -<<<<<<< HEAD - "import json\n", - "import sys\n", - "import os\n", - "import onnxruntime_genai as og\n", - "\n", - "def register_execution_providers():\n", - " worker_script = os.path.abspath('winml.py')\n", - " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", - " paths = json.loads(result)\n", - " for item in paths.items():\n", - " og.register_execution_provider_library(item[0], item[1])\n", - "\n", - "register_execution_providers()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import onnxruntime_genai as og\n", -======= ->>>>>>> 014446d171668720d793754bd3073fbc38675bc7 "import json\n", "import sys\n", "import os\n", diff --git a/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb b/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb index 53bac5da..6b78ea4e 100644 --- a/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb +++ b/microsoft-Phi-4-reasoning/aitk/inference_sample.ipynb @@ -19,31 +19,6 @@ "source": [ "# reference: https://learn.microsoft.com/en-us/windows/ai/new-windows-ml/tutorial?tabs=python#acquiring-the-model-and-preprocessing\n", "import subprocess\n", -<<<<<<< HEAD - "import json\n", - "import sys\n", - "import os\n", - "import onnxruntime_genai as og\n", - "\n", - "def register_execution_providers():\n", - " worker_script = os.path.abspath('winml.py')\n", - " result = subprocess.check_output([sys.executable, worker_script], text=True)\n", - " paths = json.loads(result)\n", - " for item in paths.items():\n", - " og.register_execution_provider_library(item[0], item[1])\n", - "\n", - "register_execution_providers()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import onnxruntime_genai as og\n", -======= ->>>>>>> 014446d171668720d793754bd3073fbc38675bc7 "import json\n", "import sys\n", "import os\n", From 3c8dfcded8fc3729d27c69c4b50d6af2b84adefd Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Tue, 14 Oct 2025 17:56:11 +0800 Subject: [PATCH 12/14] fix conflict --- .aitk/requirements/requirements-WCR.txt | 15 --------------- .aitk/requirements/requirements-WCR_CUDA.txt | 12 ------------ 2 files changed, 27 deletions(-) diff --git a/.aitk/requirements/requirements-WCR.txt b/.aitk/requirements/requirements-WCR.txt index 2b7f0bed..d718352d 100644 --- a/.aitk/requirements/requirements-WCR.txt +++ b/.aitk/requirements/requirements-WCR.txt @@ -65,13 +65,6 @@ onnxruntime-winml==1.23.0.dev2025091008 onnxscript==0.3.2 # optimum==1.26.0 optimum==1.26.0 -<<<<<<< HEAD ---index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple ---extra-index-url https://pypi.org/simple -# winrt-runtime==3.2.1 -winrt-runtime==3.2.1 -typing-extensions==4.14.1 -======= optuna==4.2.1 packaging==24.2 pandas==2.2.3 @@ -126,17 +119,9 @@ wcwidth==0.2.13 widgetsnbextension==4.0.13 # winrt-runtime==3.2.1 winrt-runtime==3.2.1 ->>>>>>> 014446d171668720d793754bd3073fbc38675bc7 # winrt-Windows.Foundation==3.2.1 winrt-Windows.Foundation==3.2.1 # winrt-Windows.Foundation.Collections==3.2.1 winrt-Windows.Foundation.Collections==3.2.1 -<<<<<<< HEAD -./wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl -# download:wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl -./wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl -# download:wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl -======= xxhash==3.5.0 yarl==1.20.1 ->>>>>>> 014446d171668720d793754bd3073fbc38675bc7 diff --git a/.aitk/requirements/requirements-WCR_CUDA.txt b/.aitk/requirements/requirements-WCR_CUDA.txt index 2eb5cd27..cbc08baa 100644 --- a/.aitk/requirements/requirements-WCR_CUDA.txt +++ b/.aitk/requirements/requirements-WCR_CUDA.txt @@ -66,10 +66,6 @@ onnxruntime-winml==1.23.0.dev2025091008 onnxscript==0.3.2 # optimum==1.26.0 optimum==1.26.0 -<<<<<<< HEAD ---index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple ---extra-index-url https://pypi.org/simple -======= optuna==4.2.1 packaging==24.2 pandas==2.2.3 @@ -123,19 +119,11 @@ wasdk-Microsoft.Windows.AI.MachineLearning==1.8.250916003 wasdk-Microsoft.Windows.ApplicationModel.DynamicDependency.Bootstrap==1.8.250916003 wcwidth==0.2.13 widgetsnbextension==4.0.13 ->>>>>>> 014446d171668720d793754bd3073fbc38675bc7 # winrt-runtime==3.2.1 winrt-runtime==3.2.1 # winrt-Windows.Foundation==3.2.1 winrt-Windows.Foundation==3.2.1 # winrt-Windows.Foundation.Collections==3.2.1 winrt-Windows.Foundation.Collections==3.2.1 -<<<<<<< HEAD -./wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl -# download:wasdk_microsoft_windows_ai_machinelearning-1.8.250812004.dev0-cp312-cp312-win_amd64.whl -./wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl -# download:wasdk_microsoft_windows_applicationmodel_dynamicdependency_bootstrap-1.8.250812004.dev0-cp312-cp312-win_amd64.whl -======= xxhash==3.5.0 yarl==1.20.1 ->>>>>>> 014446d171668720d793754bd3073fbc38675bc7 From 7c361f84c705795c79441a5447b79eb2768c66d5 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Thu, 16 Oct 2025 17:28:53 +0800 Subject: [PATCH 13/14] fix merge issue --- .aitk/scripts/sanitize/main.py | 1 + google-bert-bert-base-multilingual-cased/aitk/README.md | 2 +- google-vit-base-patch16-224/aitk/README.md | 2 +- intel-bert-base-uncased-mrpc/aitk/README.md | 2 +- laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md | 2 +- microsoft-resnet-50/aitk/README.md | 2 +- openai-clip-vit-base-patch16/aitk/README.md | 2 +- openai-clip-vit-base-patch32/aitk/README.md | 2 +- 8 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.aitk/scripts/sanitize/main.py b/.aitk/scripts/sanitize/main.py index 1f914430..74a44dee 100644 --- a/.aitk/scripts/sanitize/main.py +++ b/.aitk/scripts/sanitize/main.py @@ -53,6 +53,7 @@ def main(): parameterTemplate = readCheckParameterTemplate(os.path.join(configDir, "parameter_template.json")) modelList.Check() + # check each model for model in modelList.allModels(): modelDir = shouldCheckModel(str(rootDir), configDir, model) diff --git a/google-bert-bert-base-multilingual-cased/aitk/README.md b/google-bert-bert-base-multilingual-cased/aitk/README.md index e509abd9..867ec378 100644 --- a/google-bert-bert-base-multilingual-cased/aitk/README.md +++ b/google-bert-bert-base-multilingual-cased/aitk/README.md @@ -3,7 +3,7 @@ This folder contains examples of BERT optimization using different workflows. - QDQ for Qualcomm NPU / AMD NPU -- OpenVINO for Intel NPU +- OpenVINO for Intel® CPU/GPU/NPU - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## QDQ for Qualcomm NPU / AMD NPU diff --git a/google-vit-base-patch16-224/aitk/README.md b/google-vit-base-patch16-224/aitk/README.md index 65a99f04..f2a46bb2 100644 --- a/google-vit-base-patch16-224/aitk/README.md +++ b/google-vit-base-patch16-224/aitk/README.md @@ -3,7 +3,7 @@ This folder contains examples of VIT optimization using different workflows. - QDQ for Qualcomm NPU / AMD NPU -- OpenVINO for Intel NPU +- OpenVINO for Intel® CPU/GPU/NPU - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## Optimization Workflows diff --git a/intel-bert-base-uncased-mrpc/aitk/README.md b/intel-bert-base-uncased-mrpc/aitk/README.md index 0499bf19..0d59c1da 100644 --- a/intel-bert-base-uncased-mrpc/aitk/README.md +++ b/intel-bert-base-uncased-mrpc/aitk/README.md @@ -3,7 +3,7 @@ This folder contains examples of BERT optimization using different workflows. - QDQ for Qualcomm NPU / AMD NPU -- OpenVINO for Intel NPU +- OpenVINO for Intel® CPU/GPU/NPU - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## QDQ for Qualcomm NPU / AMD NPU diff --git a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md index 07918c72..91fe950c 100644 --- a/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md +++ b/laion-CLIP-ViT-B-32-laion2B-s34B-b79K/aitk/README.md @@ -4,7 +4,7 @@ This folder contains examples of Laion Clip optimization using different workflo - Text and vision model QDQ for Qualcomm NPU - QDQ for AMD NPU -- OpenVINO for Intel NPU +- OpenVINO for Intel® CPU/GPU/NPU - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## Laion Clip text optimization with QDQ for Qualcomm NPU diff --git a/microsoft-resnet-50/aitk/README.md b/microsoft-resnet-50/aitk/README.md index 390294cc..ffd2ebc1 100644 --- a/microsoft-resnet-50/aitk/README.md +++ b/microsoft-resnet-50/aitk/README.md @@ -3,7 +3,7 @@ This folder contains examples of ResNet optimization using different workflows. - QDQ for Qualcomm NPU / AMD NPU -- OpenVINO for Intel NPU +- OpenVINO for Intel® CPU/GPU/NPU - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## QDQ for Qualcomm NPU / AMD NPU diff --git a/openai-clip-vit-base-patch16/aitk/README.md b/openai-clip-vit-base-patch16/aitk/README.md index 25f4b93f..9e1c299d 100644 --- a/openai-clip-vit-base-patch16/aitk/README.md +++ b/openai-clip-vit-base-patch16/aitk/README.md @@ -4,7 +4,7 @@ This folder contains examples of Openai Clip optimization using different workfl - Text and vision model QDQ for Qualcomm NPU - QDQ for AMD NPU -- OpenVINO for Intel NPU +- OpenVINO for Intel® CPU/GPU/NPU - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## Openai Clip text optimization with QDQ for Qualcomm NPU diff --git a/openai-clip-vit-base-patch32/aitk/README.md b/openai-clip-vit-base-patch32/aitk/README.md index 25f4b93f..9e1c299d 100644 --- a/openai-clip-vit-base-patch32/aitk/README.md +++ b/openai-clip-vit-base-patch32/aitk/README.md @@ -4,7 +4,7 @@ This folder contains examples of Openai Clip optimization using different workfl - Text and vision model QDQ for Qualcomm NPU - QDQ for AMD NPU -- OpenVINO for Intel NPU +- OpenVINO for Intel® CPU/GPU/NPU - Float downcasting for NVIDIA TRT for RTX GPU / DML for general GPU / WebGPU for general GPU ## Openai Clip text optimization with QDQ for Qualcomm NPU From 47c0e63466eb58d74b8607bdd5f8719a06d4e119 Mon Sep 17 00:00:00 2001 From: Chao Zhang Date: Mon, 20 Oct 2025 08:39:27 +0800 Subject: [PATCH 14/14] fix lint --- intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json index ebbca861..26a24bb8 100644 --- a/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json +++ b/intel-bert-base-uncased-mrpc/aitk/bert_webgpu.json @@ -127,4 +127,4 @@ "target": "target_system", "cache_dir": "cache", "output_dir": "model/bert_webgpu" -} \ No newline at end of file +}