From a81f5ec2067142988f69e97f4febb0c12e126f8f Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 16 Feb 2026 11:27:14 +0530 Subject: [PATCH 1/2] [NvTesnorRtRtx] Add mixed precision nvModelOpt recipes for Phi-4-mini-instruct Signed-off-by: unknown --- .../NvTensorRtRtx/README.md | 60 +++++++++++++++++++ .../NvTensorRtRtx/info.yml | 6 ++ ...uct_nvmo_ptq_mixed_precision_awq_lite.json | 30 ++++++++++ .../NvTensorRtRtx/requirements-nvmo.txt | 3 + 4 files changed, 99 insertions(+) create mode 100644 microsoft-Phi-4-mini-instruct/NvTensorRtRtx/README.md create mode 100644 microsoft-Phi-4-mini-instruct/NvTensorRtRtx/info.yml create mode 100644 microsoft-Phi-4-mini-instruct/NvTensorRtRtx/microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite.json create mode 100644 microsoft-Phi-4-mini-instruct/NvTensorRtRtx/requirements-nvmo.txt diff --git a/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/README.md b/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/README.md new file mode 100644 index 00000000..66557b68 --- /dev/null +++ b/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/README.md @@ -0,0 +1,60 @@ +# Phi-4-mini-instruct optimization + +This folder contains examples of Olive recipes for `Phi-4-mini-instruct` optimization. + +## NVMO PTQ Mixed Precision Quantization + +The olive recipe `microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite.json` produces INT4 + INT8 mixed precision quantized model using NVIDIA's TensorRT Model Optimizer toolkit with AWQ algorithm. + +### Setup + +1. Install Olive with NVIDIA TensorRT Model Optimizer toolkit + + - Run following command to install Olive with TensorRT Model Optimizer. + ```bash + pip install olive-ai[nvmo] + ``` + + - If TensorRT Model Optimizer needs to be installed from a local wheel, then follow below steps. + + ```bash + pip install olive-ai + pip install [onnx] + ``` + + - Make sure that TensorRT Model Optimizer is installed correctly. + ```bash + python -c "from modelopt.onnx.quantization.int4 import quantize as quantize_int4" + ``` + + - Refer TensorRT Model Optimizer [documentation](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/windows/_installation_with_olive.html) for its detailed installation instructions and setup dependencies. + +2. Install suitable onnxruntime and onnxruntime-genai packages + + - Install the onnxruntime and onnxruntime-genai packages that have NvTensorRTRTXExecutionProvider support. Refer documentation for [NvTensorRtRtx execution-provider](https://onnxruntime.ai/docs/execution-providers/TensorRTRTX-ExecutionProvider) to setup its dependencies/requirements. + - Note that by default, TensorRT Model Optimizer comes with onnxruntime-directml. And onnxrutime-genai-cuda package comes with onnxruntime-gpu. So, in order to use onnxruntime package with NvTensorRTRTXExecutionProvider support, one might need to uninstall existing other onnxruntime packages. + - Make sure that at the end, there is only one onnxruntime package installed. Use command like following for validating the onnxruntime package installation. + ```bash + python -c "import onnxruntime as ort; print(ort.get_available_providers())" + ``` + +3. Install additional requirements. + + - Install packages provided in requirements text file. + ```bash + pip install -r requirements-nvmo.txt + ``` + +### Steps to run + +```bash +olive run --config microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite.json +``` + +### Recipe details + +The olive recipe `microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite.json` has 2 passes: (a) `ModelBuilder` and (b) `NVModelOptQuantization`. The `ModelBuilder` pass is used to generate the FP16 model for `NvTensorRTRTXExecutionProvider` (aka `NvTensorRtRtx` EP). Subsequently, the `NVModelOptQuantization` pass performs INT4 + INT8 mixed precision quantization using AWQ algorithm with AWQ Lite calibration method to produce the optimized model. + +### Troubleshoot + +In case of any issue related to quantization using TensorRT Model Optimizer toolkit, refer its [FAQs](https://nvidia.github.io/TensorRT-Model-Optimizer/support/2_faqs.html) for potential help or suggestions. diff --git a/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/info.yml b/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/info.yml new file mode 100644 index 00000000..8d77fbe5 --- /dev/null +++ b/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/info.yml @@ -0,0 +1,6 @@ +arch: phi3 +recipes: + - name: microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite + file: microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite.json + devices: gpu + eps: NvTensorRTRTXExecutionProvider diff --git a/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite.json b/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite.json new file mode 100644 index 00000000..2be8846a --- /dev/null +++ b/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite.json @@ -0,0 +1,30 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/Phi-4-mini-instruct", + "task": "text-classification" + }, + "systems": { + "local_system": { + "type": "LocalSystem", + "accelerators": [ { "device": "gpu", "execution_providers": [ "NvTensorRTRTXExecutionProvider" ] } ] + } + }, + "engine": { "target": "local_system" }, + "passes": { + "builder": { "type": "ModelBuilder", "precision": "fp16" }, + "quantization": { + "type": "NVModelOptQuantization", + "algorithm": "awq", + "int4_block_size": 32, + "tokenizer_dir": "microsoft/Phi-4-mini-instruct", + "calibration_method": "awq_lite", + "enable_mixed_quant": true, + "calibration_providers": ["NvTensorRtRtx"], + "calibration_params": { + "add_position_ids": false + } + } + }, + "log_severity_level": 0 +} diff --git a/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/requirements-nvmo.txt b/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/requirements-nvmo.txt new file mode 100644 index 00000000..ea1022cd --- /dev/null +++ b/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/requirements-nvmo.txt @@ -0,0 +1,3 @@ +datasets>=2.14.4 +torch +transformers From a8b7219855d193823d183145b34270c9b417966b Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 16 Feb 2026 11:43:06 +0530 Subject: [PATCH 2/2] [NvTesnorRtRtx] Add mixed precision nvModelOpt recipes for Phi-4-mini-instruct Signed-off-by: unknown --- microsoft-Phi-4-mini-instruct/NvTensorRtRtx/info.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/info.yml b/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/info.yml index 8d77fbe5..f54a742b 100644 --- a/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/info.yml +++ b/microsoft-Phi-4-mini-instruct/NvTensorRtRtx/info.yml @@ -1,4 +1,4 @@ -arch: phi3 +arch: phi4 recipes: - name: microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite file: microsoft-Phi-4-mini-instruct_nvmo_ptq_mixed_precision_awq_lite.json