Fix force_save_fsdp_all_gather and split_fsdp_prefetch #792

Workflow file for this run

.github/workflows/test_torchtitan.yml at b858ffd

	name: Test TorchTitan Integration

	on:
	pull_request:
	push:
	branches:
	- main
	- release/*

	concurrency:
	group: test-torchtitan-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number \|\| github.ref }}
	cancel-in-progress: true

	jobs:
	test-torchtitan:
	name: Test TorchTitan Integration (cuda12.6-py3.12)
	uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
	strategy:
	fail-fast: true
	matrix:
	include:
	- name: 12xlargegpu
	runs-on: linux.g5.12xlarge.nvidia.gpu
	torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126'
	gpu-arch-type: "cuda"
	gpu-arch-version: "12.6"
	with:
	timeout: 60
	runner: ${{ matrix.runs-on }}
	gpu-arch-type: ${{ matrix.gpu-arch-type }}
	gpu-arch-version: ${{ matrix.gpu-arch-version }}
	submodules: recursive
	script: \|
	conda create --yes --quiet --name py312 python=3.12
	source $(conda info --base)/etc/profile.d/conda.sh
	conda activate py312

	pip install --quiet -r requirements-test.txt
	# For some reason the spec above isnt working
	pip uninstall -y torch
	pip install --no-input --quiet --pre torch --index-url https://download.pytorch.org/whl/nightly/cu126
	pip install --quiet .

	# Clone TorchTitan
	git clone https://github.com/pytorch/torchtitan.git
	cd torchtitan
	pip install --quiet -r requirements.txt

	# Run TorchTitan training with AutoParallel
	NGPU=4 CONFIG_FILE="./torchtitan/models/llama3/train_configs/debug_model.toml" ./run_train.sh \
	--model.name autoparallel.llama3 \
	--parallelism.tensor_parallel_degree 4 \
	--training.dataset c4 \
	--compile.enable \
	--job.custom_config_module=torchtitan.experiments.autoparallel.job_config

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Fix force_save_fsdp_all_gather and split_fsdp_prefetch #792

Workflow file

Fix force_save_fsdp_all_gather and split_fsdp_prefetch #792

Uh oh!

Workflow file for this run