From 7d92a4554e9d8bb81ebb4443177be85055d0016c Mon Sep 17 00:00:00 2001 From: hualxie Date: Thu, 12 Feb 2026 14:29:08 +0800 Subject: [PATCH 1/2] update readme and profiling req --- .aitk/configs/checks.json | 2 +- .aitk/requirements/requirements-Profiling.txt | 1 + Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md | 7 +++++-- .../aitk/README.md | 7 +++++-- .../aitk/_copy.json.config | 18 ------------------ .../aitk/README.md | 7 +++++-- microsoft-Phi-3.5-mini-instruct/aitk/README.md | 7 +++++-- 7 files changed, 22 insertions(+), 27 deletions(-) diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json index f7dec5a7..256887da 100644 --- a/.aitk/configs/checks.json +++ b/.aitk/configs/checks.json @@ -1,6 +1,6 @@ { "configCheck": 139, - "copyCheck": 179, + "copyCheck": 178, "extensionCheck": 1, "gitignoreCheck": 40, "inferenceModelCheck": 25, diff --git a/.aitk/requirements/requirements-Profiling.txt b/.aitk/requirements/requirements-Profiling.txt index 4ff32c6e..9091f54a 100644 --- a/.aitk/requirements/requirements-Profiling.txt +++ b/.aitk/requirements/requirements-Profiling.txt @@ -5,6 +5,7 @@ mpmath==1.3.0 numpy==2.2.4 # onnx==1.17.0 onnx==1.17.0 +onnxruntime-genai-winml==0.11.2 # uvpip:uninstall onnxruntime-winml;pre # We also need to uninstall in case user tries new version, uses previous version and then updates again # because uninstalling winml will remove onnxruntime folder but we will not install windowsml to add it back diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md index 13a61646..1c3ef30a 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md @@ -2,14 +2,17 @@ This repository demonstrates the optimization of the [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) model using **post-training quantization (PTQ)** techniques. The optimization process is divided into these workflows: -- QDQ for AMD NPU +- Quark Quantization for AMD NPU - PTQ + AOT for QNN NPU + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** +- int4 Quantization for QNN GPU - OpenVINO for Intel® CPU/GPU/NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` - Float downcasting for NVIDIA TRT for RTX GPU - DML for general GPU - + This process uses AutoAWQ and ModelBuilder + + This process uses ModelBuilder + +**For some python packages, users need to install visual studio 2022 or visual studio 2022 build tools with c++ development tools modules.** ## **QDQ Model with 4-bit Weights & 16-bit Activations** diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md index 50d6122d..60502e1a 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md @@ -2,14 +2,17 @@ This repository demonstrates the optimization of the [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) model using **post-training quantization (PTQ)** techniques. The optimization process is divided into these workflows: -- QDQ for AMD NPU +- Quark Quantization for AMD NPU - PTQ + AOT for QNN NPU + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** +- int4 Quantization for QNN GPU - OpenVINO for Intel® CPU/GPU/NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` - Float downcasting for NVIDIA TRT for RTX GPU - DML for general GPU - + This process uses AutoAWQ and ModelBuilder + + This process uses ModelBuilder + +**For some python packages, users need to install visual studio 2022 or visual studio 2022 build tools with c++ development tools modules.** ## **QDQ Model with 4-bit Weights & 16-bit Activations** diff --git a/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config index db7d8f65..a9fc1749 100644 --- a/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config +++ b/meta-llama-Llama-3.1-8B-Instruct/aitk/_copy.json.config @@ -47,24 +47,6 @@ "dst": "llama3_1_dml_config.json.config", "replacements": [] }, - { - "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md", - "dst": "README.md", - "replacements": [ - { - "find": "# DeepSeek-R1-Distill-Qwen-1.5B Model Optimization", - "replace": "# Llama-3.1-8B-Instruct Model Optimization" - }, - { - "find": "[DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B)", - "replace": "[Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct)" - }, - { - "find": "> ⚠️ If got 6033 error, replace `genai_config.json` in `./model` folder", - "replace": "" - } - ] - }, { "src": "../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/requirements.txt", "dst": "requirements.txt", diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md index 573bf132..285c1ee4 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md @@ -2,14 +2,17 @@ This repository demonstrates the optimization of the [Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) model using **post-training quantization (PTQ)** techniques. The optimization process is divided into these workflows: -- QDQ for AMD NPU +- Quark Quantization for AMD NPU - PTQ + AOT for QNN NPU + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** +- int4 Quantization for QNN GPU - OpenVINO for Intel® CPU/GPU/NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` - Float downcasting for NVIDIA TRT for RTX GPU - DML for general GPU - + This process uses AutoAWQ and ModelBuilder + + This process uses ModelBuilder + +**For some python packages, users need to install visual studio 2022 or visual studio 2022 build tools with c++ development tools modules.** ## **QDQ Model with 4-bit Weights & 16-bit Activations** diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/README.md b/microsoft-Phi-3.5-mini-instruct/aitk/README.md index cd635e33..be4d058e 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/README.md +++ b/microsoft-Phi-3.5-mini-instruct/aitk/README.md @@ -2,14 +2,17 @@ This repository demonstrates the optimization of the [Microsoft Phi-3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) model using **post-training quantization (PTQ)** techniques. The optimization process is divided into these workflows: -- QDQ for AMD NPU +- Quark Quantization for AMD NPU - PTQ + AOT for QNN NPU + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** +- int4 Quantization for QNN GPU - OpenVINO for Intel® CPU/GPU/NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` - Float downcasting for NVIDIA TRT for RTX GPU - DML for general GPU - + This process uses AutoAWQ and ModelBuilder + + This process uses ModelBuilder + +**For some python packages, users need to install visual studio 2022 or visual studio 2022 build tools with c++ development tools modules.** ## **QDQ Model with 4-bit Weights & 16-bit Activations** From fbaa57e000e6ed1cbe66f48c54d0c10acd64f0dc Mon Sep 17 00:00:00 2001 From: hualxie Date: Thu, 12 Feb 2026 14:33:42 +0800 Subject: [PATCH 2/2] I --- Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md | 2 +- deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md | 2 +- meta-llama-Llama-3.2-1B-Instruct/aitk/README.md | 2 +- microsoft-Phi-3.5-mini-instruct/aitk/README.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md index 1c3ef30a..c2b78901 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/README.md @@ -5,7 +5,7 @@ This repository demonstrates the optimization of the [Qwen2.5-1.5B-Instruct](htt - Quark Quantization for AMD NPU - PTQ + AOT for QNN NPU + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** -- int4 Quantization for QNN GPU +- Int4 Quantization for QNN GPU - OpenVINO for Intel® CPU/GPU/NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` - Float downcasting for NVIDIA TRT for RTX GPU diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md index 60502e1a..177708c0 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/README.md @@ -5,7 +5,7 @@ This repository demonstrates the optimization of the [DeepSeek-R1-Distill-Qwen-1 - Quark Quantization for AMD NPU - PTQ + AOT for QNN NPU + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** -- int4 Quantization for QNN GPU +- Int4 Quantization for QNN GPU - OpenVINO for Intel® CPU/GPU/NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` - Float downcasting for NVIDIA TRT for RTX GPU diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md index 285c1ee4..0dd6ffa4 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/README.md @@ -5,7 +5,7 @@ This repository demonstrates the optimization of the [Llama-3.2-1B-Instruct](htt - Quark Quantization for AMD NPU - PTQ + AOT for QNN NPU + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** -- int4 Quantization for QNN GPU +- Int4 Quantization for QNN GPU - OpenVINO for Intel® CPU/GPU/NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` - Float downcasting for NVIDIA TRT for RTX GPU diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/README.md b/microsoft-Phi-3.5-mini-instruct/aitk/README.md index be4d058e..ada2f2dc 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/README.md +++ b/microsoft-Phi-3.5-mini-instruct/aitk/README.md @@ -5,7 +5,7 @@ This repository demonstrates the optimization of the [Microsoft Phi-3.5 Mini Ins - Quark Quantization for AMD NPU - PTQ + AOT for QNN NPU + This process extends the QDQ flow and compiling specifically for **Qualcomm NPUs** -- int4 Quantization for QNN GPU +- Int4 Quantization for QNN GPU - OpenVINO for Intel® CPU/GPU/NPU + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation` - Float downcasting for NVIDIA TRT for RTX GPU