Fix force_save_fsdp_all_gather and split_fsdp_prefetch #792
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test TorchTitan Integration | |
| on: | |
| pull_request: | |
| push: | |
| branches: | |
| - main | |
| - release/* | |
| concurrency: | |
| group: test-torchtitan-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| test-torchtitan: | |
| name: Test TorchTitan Integration (cuda12.6-py3.12) | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
| strategy: | |
| fail-fast: true | |
| matrix: | |
| include: | |
| - name: 12xlargegpu | |
| runs-on: linux.g5.12xlarge.nvidia.gpu | |
| torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126' | |
| gpu-arch-type: "cuda" | |
| gpu-arch-version: "12.6" | |
| with: | |
| timeout: 60 | |
| runner: ${{ matrix.runs-on }} | |
| gpu-arch-type: ${{ matrix.gpu-arch-type }} | |
| gpu-arch-version: ${{ matrix.gpu-arch-version }} | |
| submodules: recursive | |
| script: | | |
| conda create --yes --quiet --name py312 python=3.12 | |
| source $(conda info --base)/etc/profile.d/conda.sh | |
| conda activate py312 | |
| pip install --quiet -r requirements-test.txt | |
| # For some reason the spec above isnt working | |
| pip uninstall -y torch | |
| pip install --no-input --quiet --pre torch --index-url https://download.pytorch.org/whl/nightly/cu126 | |
| pip install --quiet . | |
| # Clone TorchTitan | |
| git clone https://github.com/pytorch/torchtitan.git | |
| cd torchtitan | |
| pip install --quiet -r requirements.txt | |
| # Run TorchTitan training with AutoParallel | |
| NGPU=4 CONFIG_FILE="./torchtitan/models/llama3/train_configs/debug_model.toml" ./run_train.sh \ | |
| --model.name autoparallel.llama3 \ | |
| --parallelism.tensor_parallel_degree 4 \ | |
| --training.dataset c4 \ | |
| --compile.enable \ | |
| --job.custom_config_module=torchtitan.experiments.autoparallel.job_config |