From e4dcfcd6701f43adb2a7b0317032fccbb9d4a7dd Mon Sep 17 00:00:00 2001 From: mesakhcienet Date: Wed, 24 Dec 2025 08:02:39 +0000 Subject: [PATCH] fix: add missing module init and update docs --- .../posttraining/knowledge_distillation.md | 20 ++++++++++++++++--- src/MaxText/utils/ckpt_scripts/__init__.py | 0 2 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 src/MaxText/utils/ckpt_scripts/__init__.py diff --git a/docs/tutorials/posttraining/knowledge_distillation.md b/docs/tutorials/posttraining/knowledge_distillation.md index 7723e568be..6c165b427b 100644 --- a/docs/tutorials/posttraining/knowledge_distillation.md +++ b/docs/tutorials/posttraining/knowledge_distillation.md @@ -49,13 +49,26 @@ export RUN_NAME = ```sh git clone https://github.com/AI-Hypercomputer/maxtext.git -python3 -m venv ~/venv-maxtext -source ~/venv-maxtext/bin/activate +python3 -m venv ~/maxtext_venv +source ~/maxtext_venv/bin/activate python3 -m pip install uv cd maxtext uv pip install -r dependencies/requirements/requirements.txt ``` +--- + +**⚠️ Warning: PyTorch Installation Required for Checkpoint Scripts** + +The checkpoint conversion scripts located under `MaxText/utils/ckpt_scripts/` (e.g., `llama_or_mistral_ckpt.py`, `convert_deepseek_family_unscanned_ckpt.py`) have a dependency on PyTorch (`torch`). These scripts are used for converting model checkpoints from other formats to be compatible with MaxText. + +If you intend to use these conversion scripts, you must install PyTorch. We recommend using `uv` for a fast and efficient installation: + +```bash +# Example command to install torch +uv pip install torch +``` + ### 1. Obtain and prepare the teacher model #### a. Download model from Hugging Face @@ -94,7 +107,8 @@ JAX_PLATFORMS=cpu \ python3 -m MaxText.utils.ckpt_scripts.llama_or_mistral_ckpt \ --base-model-path ~/llama2-7b-chat \ --maxtext-model-path ${BASE_DIRECTORY}/llama2-7b-chat/scanned \ - --model-size llama2-7b + --model-size llama2-7b \ + --huggingface-checkpoint true ``` ### 3. Generate dataset using the teacher model diff --git a/src/MaxText/utils/ckpt_scripts/__init__.py b/src/MaxText/utils/ckpt_scripts/__init__.py new file mode 100644 index 0000000000..e69de29bb2