From 1fe49a4a2d5ebde832ce453ddc3457660b6dc27e Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 24 Mar 2025 11:05:37 +0200 Subject: [PATCH 1/4] add push_to_hub support --- src/ltxv_trainer/config.py | 17 +++++++++++++++++ src/ltxv_trainer/trainer.py | 10 ++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/ltxv_trainer/config.py b/src/ltxv_trainer/config.py index a99eaf5..ca501de 100644 --- a/src/ltxv_trainer/config.py +++ b/src/ltxv_trainer/config.py @@ -246,6 +246,22 @@ class CheckpointsConfig(ConfigBaseModel): ) +class HubConfig(ConfigBaseModel): + """Configuration for Hugging Face Hub integration""" + + push_to_hub: bool = Field( + default=False, + description="Whether to push the model weights to the Hugging Face Hub" + ) + hub_model_id: str = Field( + default=None, + description="Hugging Face Hub repository ID (e.g., 'username/repo-name')" + ) + hub_token: str = Field( + default=None, + description="Hugging Face token. If None, will use the token from the Hugging Face CLI" + ) + class FlowMatchingConfig(ConfigBaseModel): """Configuration for flow matching training""" @@ -271,6 +287,7 @@ class LtxvTrainerConfig(ConfigBaseModel): data: DataConfig = Field(default_factory=DataConfig) validation: ValidationConfig = Field(default_factory=ValidationConfig) checkpoints: CheckpointsConfig = Field(default_factory=CheckpointsConfig) + hub: HubConfig = Field(default_factory=HubConfig) flow_matching: FlowMatchingConfig = Field(default_factory=FlowMatchingConfig) # General configuration diff --git a/src/ltxv_trainer/trainer.py b/src/ltxv_trainer/trainer.py index d4f08f6..1126691 100644 --- a/src/ltxv_trainer/trainer.py +++ b/src/ltxv_trainer/trainer.py @@ -13,6 +13,7 @@ from accelerate.utils import set_seed from diffusers import LTXImageToVideoPipeline, LTXPipeline from diffusers.utils import export_to_video +from huggingface_hub import create_repo, upload_folder from loguru import logger from peft import LoraConfig, get_peft_model_state_dict from peft.tuners.tuners_utils import BaseTunerLayer @@ -291,6 +292,15 @@ def train( # noqa: PLR0912, PLR0915 if self._accelerator.is_main_process: saved_path = self._save_checkpoint() + # Upload artifacts to hub if enabled + if cfg.hub.push_to_hub: + repo_id = cfg.hub.hub_model_id or Path(cfg.output_dir).name + repo_id = create_repo(token=cfg.hub.hub_token, repo_id=repo_id, exist_ok=True) + upload_folder( + repo_id=repo_id, + folder_path=Path(self._config.output_dir), + ) + # Log the training statistics self._log_training_stats(stats) From e3f27f1608ee366822202ced0a450401e7579630 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Tue, 25 Mar 2025 12:25:45 +0200 Subject: [PATCH 2/4] add save_model_card to utils.py --- src/ltxv_trainer/utils.py | 95 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/src/ltxv_trainer/utils.py b/src/ltxv_trainer/utils.py index dbbaf71..e1dd000 100644 --- a/src/ltxv_trainer/utils.py +++ b/src/ltxv_trainer/utils.py @@ -1,13 +1,21 @@ import io import subprocess +import os +from typing import List, Union from pathlib import Path import torch from loguru import logger from PIL import ExifTags, Image, ImageCms, ImageOps from PIL.Image import Image as PilImage +import numpy as np +from diffusers.utils import export_to_video +from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card + +from PIL import Image + def get_gpu_memory_gb(device: torch.device) -> float: """Get current GPU memory usage in GB using nvidia-smi""" try: @@ -69,3 +77,90 @@ def open_image_as_srgb(image_path: str | Path | io.BytesIO) -> PilImage: srgb_img.info["icc_profile"] = srgb_profile_data return srgb_img + + +def save_model_card( + output_dir: str, + repo_id: str, + pretrained_model_name_or_path: str, + videos: Union[List[str], Union[List[PilImage.Image], List[np.ndarray]]], + validation_prompts: List[str], + fps: int = 30, +) -> None: + widget_dict = [] + if videos is not None and len(videos) > 0: + for i, (video, validation_prompt) in enumerate(zip(videos, validation_prompts)): + if not isinstance(video, str): + export_to_video(video, os.path.join(output_dir, f"final_video_{i}.mp4"), fps=fps) + widget_dict.append( + { + "text": validation_prompt if validation_prompt else " ", + "output": {"url": video if isinstance(video, str) else f"final_video_{i}.mp4"}, + } + ) + + model_description = f""" +# LoRA Finetune + + + +## Model description + +This is a lora finetune of model: `{pretrained_model_name_or_path}`. + +The model was trained using [`LTX-Video Community Trainer`](https://github.com/Lightricks/LTX-Video-Trainer). + +## Download model + +[Download LoRA]({repo_id}/tree/main) in the Files & Versions tab. + +## Usage + +### Using Trained LoRAs with `diffusers`: +Requires the [๐Ÿงจ Diffusers library](https://github.com/huggingface/diffusers) installed. + +### ๐Ÿ”Œ Using Trained LoRAs in ComfyUI + +After training your LoRA, you can use it in ComfyUI by following these steps: + +1. Copy your trained LoRA weights (`.safetensors` file) to the `models/loras` folder in your ComfyUI installation. + +2. Install the ComfyUI-LTXVideoLoRA custom node: + + ```bash + # In the root folder of your ComfyUI installation + cd custom_nodes + git clone https://github.com/dorpxam/ComfyUI-LTXVideoLoRA + pip install -r ComfyUI-LTXVideoLoRA/requirements.txt + ``` + +3. In your ComfyUI workflow: + - Add the "LTXV LoRA Selector" node to choose your LoRA file + - Connect it to the "LTXV LoRA Loader" node to apply the LoRA to your generation + +You can find reference Text-to-Video (T2V) and Image-to-Video (I2V) workflows in the [official LTXV ComfyUI repository](https://github.com/Lightricks/ComfyUI-LTXVideo). + +```py +TODO +``` + +For more details, including weighting, merging and fusing LoRAs, check the [documentation](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading_adapters) on loading LoRAs in diffusers. +""" + + model_card = load_or_create_model_card( + repo_id_or_path=repo_id, + from_training=True, + base_model=pretrained_model_name_or_path, + model_description=model_description, + widget=widget_dict, + ) + tags = [ + "text-to-video", + "ltx-video" + "diffusers", + "lora", + "template:sd-lora", + ] + + model_card = populate_model_card(model_card, tags=tags) + model_card.save(os.path.join(output_dir, "README.md")) \ No newline at end of file From f05e60aa875608f8a6691e983d42836162b1e4e0 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Tue, 25 Mar 2025 12:54:18 +0200 Subject: [PATCH 3/4] add `save_model_card` to trainer adjust `save_model_card` to include inference examples --- src/ltxv_trainer/trainer.py | 11 +++++++ src/ltxv_trainer/utils.py | 59 +++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/src/ltxv_trainer/trainer.py b/src/ltxv_trainer/trainer.py index 1126691..2001df9 100644 --- a/src/ltxv_trainer/trainer.py +++ b/src/ltxv_trainer/trainer.py @@ -52,6 +52,7 @@ from ltxv_trainer.timestep_samplers import SAMPLERS from ltxv_trainer.utils import get_gpu_memory_gb, open_image_as_srgb + # Disable irrelevant warnings from transformers os.environ["TOKENIZERS_PARALLELISM"] = "true" @@ -296,6 +297,16 @@ def train( # noqa: PLR0912, PLR0915 if cfg.hub.push_to_hub: repo_id = cfg.hub.hub_model_id or Path(cfg.output_dir).name repo_id = create_repo(token=cfg.hub.hub_token, repo_id=repo_id, exist_ok=True) + video_filenames = sampled_videos_paths if sampled_videos_paths else [] + + save_model_card( + output_dir=cfg.output_dir, + repo_id=repo_id, + pretrained_model_name_or_path=cfg.model.model_source, + videos=video_filenames, + validation_prompts=self._config.validation.prompts + ) + upload_folder( repo_id=repo_id, folder_path=Path(self._config.output_dir), diff --git a/src/ltxv_trainer/utils.py b/src/ltxv_trainer/utils.py index e1dd000..723ed13 100644 --- a/src/ltxv_trainer/utils.py +++ b/src/ltxv_trainer/utils.py @@ -98,6 +98,8 @@ def save_model_card( "output": {"url": video if isinstance(video, str) else f"final_video_{i}.mp4"}, } ) + if pretrained_model_name_or_path not in ["Lightricks/LTX-Video", "Lightricks/LTX-Video-0.9.5"]: + pretrained_model_name_or_path = "Lightricks/LTX-Video" model_description = f""" # LoRA Finetune @@ -119,6 +121,62 @@ def save_model_card( ### Using Trained LoRAs with `diffusers`: Requires the [๐Ÿงจ Diffusers library](https://github.com/huggingface/diffusers) installed. +Text-to-Video generation using the trained LoRA: +```python +import torch +from diffusers import LTXPipeline +from diffusers.utils import export_to_video +from huggingface_hub import hf_hub_download + +pipe = LTXPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16) +lora_weights = hf_hub_download(repo_id={repo_id}, filename="path_to_your_saved_weights.safetensors") +state_dict = load_file(lora_weights) +pipe.load_lora_weights(state_dict) +pipe.to("cuda") + +prompt = "{validation_prompts[0]}" +negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted" +video = pipe( + prompt=prompt, + negative_prompt=negative_prompt, + width=704, + height=480, + num_frames=161, + num_inference_steps=50, +).frames[0] +export_to_video(video, "output.mp4", fps=24) +``` + +For Image-to-Video: +```python +import torch +from diffusers import LTXImageToVideoPipeline +from diffusers.utils import export_to_video, load_image + +pipe = LTXImageToVideoPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16) +lora_weights = hf_hub_download(repo_id={repo_id}, filename="saved_weights_path.safetensors") +state_dict = load_file(lora_weights) +pipe.load_lora_weights(state_dict) +pipe.to("cuda") + +image = load_image( + "url_to_your_image", +) +prompt = "{validation_prompts[0]}" +negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted" + +video = pipe( + image=image, + prompt=prompt, + negative_prompt=negative_prompt, + width=704, + height=480, + num_frames=161, + num_inference_steps=50, +).frames[0] +export_to_video(video, "output.mp4", fps=24) +``` + ### ๐Ÿ”Œ Using Trained LoRAs in ComfyUI After training your LoRA, you can use it in ComfyUI by following these steps: @@ -156,6 +214,7 @@ def save_model_card( ) tags = [ "text-to-video", + "image-to-video", "ltx-video" "diffusers", "lora", From 9fa10b7af5bfb35139b1612bc2de172725ecc194 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Tue, 25 Mar 2025 15:09:26 +0200 Subject: [PATCH 4/4] push to hub only in the end of the training, push also comfyui compatible file --- README.md | 21 +++ src/ltxv_trainer/config.py | 24 ++-- src/ltxv_trainer/hub_utils.py | 234 +++++++++++++++++++++++++++++++ src/ltxv_trainer/model_loader.py | 6 +- src/ltxv_trainer/trainer.py | 25 +--- src/ltxv_trainer/utils.py | 154 -------------------- templates/model_card.md | 42 ++++++ 7 files changed, 317 insertions(+), 189 deletions(-) create mode 100644 src/ltxv_trainer/hub_utils.py create mode 100644 templates/model_card.md diff --git a/README.md b/README.md index dd61b50..c09fca0 100644 --- a/README.md +++ b/README.md @@ -302,6 +302,27 @@ The trainer loads your configuration, initializes models, applies optimizations, For LoRA training, the weights will be saved as `lora_weights.safetensors` in your output directory. For full model fine-tuning, the weights will be saved as `model_weights.safetensors`. +### ๐Ÿค— Pushing Models to Hugging Face Hub + +You can automatically push your trained models to the Hugging Face Hub by adding the following to your configuration YAML: + +```yaml +hub: + push_to_hub: true + hub_model_id: "your-username/your-model-name" # Your HF username and desired repo name +``` + +Before pushing, make sure you: +1. Have a Hugging Face account +2. Are logged in via `huggingface-cli login` or have set the `HUGGING_FACE_HUB_TOKEN` environment variable +3. Have write access to the specified repository (it will be created if it doesn't exist) + +The trainer will: +- Create a model card with training details and sample outputs +- Upload the model weights (both original and ComfyUI-compatible versions) +- Push sample videos as GIFs in the model card +- Include training configuration and prompts + --- ## Fast and simple: Running the Complete Pipeline as one command diff --git a/src/ltxv_trainer/config.py b/src/ltxv_trainer/config.py index ca501de..62b2701 100644 --- a/src/ltxv_trainer/config.py +++ b/src/ltxv_trainer/config.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Literal -from pydantic import BaseModel, ConfigDict, Field, field_validator +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from ltxv_trainer.model_loader import LtxvModelVersion from ltxv_trainer.quantization import QuantizationOptions @@ -249,19 +249,19 @@ class CheckpointsConfig(ConfigBaseModel): class HubConfig(ConfigBaseModel): """Configuration for Hugging Face Hub integration""" - push_to_hub: bool = Field( - default=False, - description="Whether to push the model weights to the Hugging Face Hub" - ) - hub_model_id: str = Field( - default=None, - description="Hugging Face Hub repository ID (e.g., 'username/repo-name')" - ) - hub_token: str = Field( - default=None, - description="Hugging Face token. If None, will use the token from the Hugging Face CLI" + push_to_hub: bool = Field(default=False, description="Whether to push the model weights to the Hugging Face Hub") + hub_model_id: str | None = Field( + default=None, description="Hugging Face Hub repository ID (e.g., 'username/repo-name')" ) + @model_validator(mode="after") + def validate_hub_config(self) -> "HubConfig": + """Validate that hub_model_id is not None when push_to_hub is True.""" + if self.push_to_hub and not self.hub_model_id: + raise ValueError("hub_model_id must be specified when push_to_hub is True") + return self + + class FlowMatchingConfig(ConfigBaseModel): """Configuration for flow matching training""" diff --git a/src/ltxv_trainer/hub_utils.py b/src/ltxv_trainer/hub_utils.py new file mode 100644 index 0000000..ee7412b --- /dev/null +++ b/src/ltxv_trainer/hub_utils.py @@ -0,0 +1,234 @@ +import tempfile +from pathlib import Path +from typing import List, Union + +import imageio +from huggingface_hub import HfApi, create_repo +from loguru import logger + +from ltxv_trainer.config import LtxvTrainerConfig +from ltxv_trainer.model_loader import try_parse_version +from scripts.convert_checkpoint import convert_checkpoint + + +def convert_video_to_gif(video_path: Path, output_path: Path) -> None: + """Convert a video file to GIF format.""" + try: + # Read the video file + reader = imageio.get_reader(str(video_path)) + fps = reader.get_meta_data()["fps"] + + # Write GIF file with infinite loop + writer = imageio.get_writer( + str(output_path), + fps=min(fps, 15), # Cap FPS at 15 for reasonable file size + loop=0, # 0 means infinite loop + ) + + for frame in reader: + writer.append_data(frame) + + writer.close() + reader.close() + except Exception as e: + logger.error(f"Failed to convert video to GIF: {e}") + return None + + +def create_model_card( + output_dir: Union[str, Path], + videos: List[Path], + config: LtxvTrainerConfig, +) -> Path: + """Generate and save a model card for the trained model.""" + + repo_id = config.hub.hub_model_id + pretrained_model_name_or_path = config.model.model_source + validation_prompts = config.validation.prompts + output_dir = Path(output_dir) + template_path = Path(__file__).parent.parent.parent / "templates" / "model_card.md" + + if not template_path.exists(): + logger.warning("โš ๏ธ Model card template not found, using default template") + return + + # Read the template + template = template_path.read_text() + + # Get model name from repo_id + model_name = repo_id.split("/")[-1] + + # Get base model information + version = try_parse_version(pretrained_model_name_or_path) + if version: + base_model_link = version.safetensors_url + base_model_name = str(version) + else: + base_model_link = f"https://huggingface.co/{pretrained_model_name_or_path}" + base_model_name = pretrained_model_name_or_path + + # Format validation prompts and create grid layout + prompts_text = "" + sample_grid = [] + + if validation_prompts and videos: + prompts_text = "Example prompts used during validation:\n\n" + + # Create samples directory + samples_dir = output_dir / "samples" + samples_dir.mkdir(exist_ok=True, parents=True) + + # Process videos and create cells + cells = [] + for i, (prompt, video) in enumerate(zip(validation_prompts, videos, strict=False)): + if video.exists(): + # Add prompt to text section + prompts_text += f"- `{prompt}`\n" + + # Convert video to GIF + gif_path = samples_dir / f"sample_{i}.gif" + try: + convert_video_to_gif(video, gif_path) + + # Create grid cell with collapsible description + cell = ( + f"![example{i + 1}](./samples/sample_{i}.gif)" + "
" + '
' + f"Prompt" + f"{prompt}" + "
" + ) + cells.append(cell) + except Exception as e: + logger.error(f"Failed to process video {video}: {e}") + + # Calculate optimal grid dimensions + num_cells = len(cells) + if num_cells > 0: + # Aim for a roughly square grid, with max 4 columns + num_cols = min(4, num_cells) + num_rows = (num_cells + num_cols - 1) // num_cols # Ceiling division + + # Create grid rows + for row in range(num_rows): + start_idx = row * num_cols + end_idx = min(start_idx + num_cols, num_cells) + row_cells = cells[start_idx:end_idx] + # Properly format the row with table markers and exact number of cells + formatted_row = "| " + " | ".join(row_cells) + " |" + sample_grid.append(formatted_row) + + # Join grid rows with just the content, no headers needed + grid_text = "\n".join(sample_grid) if sample_grid else "" + + # Fill in the template + model_card_content = template.format( + base_model=base_model_name, + base_model_link=base_model_link, + model_name=model_name, + training_type="LoRA fine-tuning" if config.model.training_mode == "lora" else "Full model fine-tuning", + training_steps=config.optimization.steps, + learning_rate=config.optimization.learning_rate, + batch_size=config.optimization.batch_size, + validation_prompts=prompts_text, + sample_grid=grid_text, + ) + + # Save the model card directly + model_card_path = output_dir / "README.md" + model_card_path.write_text(model_card_content) + + return model_card_path + + +def push_to_hub(weights_path: Path, sampled_videos_paths: List[Path], config: LtxvTrainerConfig) -> None: + """Push the trained LoRA weights to HuggingFace Hub.""" + if not config.hub.push_to_hub: + return + + if not config.hub.hub_model_id: + logger.warning("โš ๏ธ HuggingFace hub_model_id not specified, skipping push to hub") + return + + api = HfApi() + + # Try to create repo if it doesn't exist + try: + create_repo( + repo_id=config.hub.hub_model_id, + repo_type="model", + exist_ok=True, # Don't raise error if repo exists + ) + except Exception as e: + logger.error(f"โŒ Failed to create repository: {e}") + return + + # Upload the original weights file + try: + api.upload_file( + path_or_fileobj=str(weights_path), + path_in_repo=weights_path.name, + repo_id=config.hub.hub_model_id, + repo_type="model", + ) + except Exception as e: + logger.error(f"โŒ Failed to push {weights_path.name} to HuggingFace Hub: {e}") + # Create a temporary directory for the files we want to upload + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + try: + # Save model card and copy videos to temp directory + create_model_card( + output_dir=temp_path, + videos=sampled_videos_paths, + config=config, + ) + + # Upload the model card and samples directory + api.upload_folder( + folder_path=str(temp_path), # Convert to string for compatibility + repo_id=config.hub.hub_model_id, + repo_type="model", + ) + + logger.info(f"โœ… Successfully uploaded model card and sample videos to {config.hub.hub_model_id}") + except Exception as e: + logger.error(f"โŒ Failed to save/upload model card and videos: {e}") + + logger.info(f"โœ… Successfully pushed original LoRA weights to {config.hub.hub_model_id}") + + # Convert and upload ComfyUI version + try: + # Create a temporary directory for the converted file + with tempfile.TemporaryDirectory() as temp_dir: + # Convert the weights to ComfyUI format + comfy_path = Path(temp_dir) / f"{weights_path.stem}_comfy{weights_path.suffix}" + + convert_checkpoint( + input_path=str(weights_path), + to_comfy=True, + output_path=str(comfy_path), + ) + + # Find the converted file + converted_files = list(Path(temp_dir).glob("*.safetensors")) + if not converted_files: + logger.warning("โš ๏ธ No converted ComfyUI weights found") + return + + converted_file = converted_files[0] + comfy_filename = f"comfyui_{weights_path.name}" + + # Upload the converted file + api.upload_file( + path_or_fileobj=str(converted_file), + path_in_repo=comfy_filename, + repo_id=config.hub.hub_model_id, + repo_type="model", + ) + logger.info(f"โœ… Successfully pushed ComfyUI LoRA weights to {config.hub.hub_model_id}") + + except Exception as e: + logger.error(f"โŒ Failed to convert and push ComfyUI version: {e}") diff --git a/src/ltxv_trainer/model_loader.py b/src/ltxv_trainer/model_loader.py index 15aacb0..6c49e9d 100644 --- a/src/ltxv_trainer/model_loader.py +++ b/src/ltxv_trainer/model_loader.py @@ -160,7 +160,7 @@ def load_vae( """ if isinstance(source, str): # noqa: SIM102 # Try to parse as version first - if version := _try_parse_version(source): + if version := try_parse_version(source): source = version if isinstance(source, LtxvModelVersion): @@ -217,7 +217,7 @@ def load_transformer( """ if isinstance(source, str): # noqa: SIM102 # Try to parse as version first - if version := _try_parse_version(source): + if version := try_parse_version(source): source = version if isinstance(source, LtxvModelVersion): @@ -285,7 +285,7 @@ def load_ltxv_components( ) -def _try_parse_version(source: str | Path) -> LtxvModelVersion | None: +def try_parse_version(source: str | Path) -> LtxvModelVersion | None: """ Try to parse a string as an LtxvModelVersion. diff --git a/src/ltxv_trainer/trainer.py b/src/ltxv_trainer/trainer.py index 2001df9..bb10a16 100644 --- a/src/ltxv_trainer/trainer.py +++ b/src/ltxv_trainer/trainer.py @@ -13,7 +13,6 @@ from accelerate.utils import set_seed from diffusers import LTXImageToVideoPipeline, LTXPipeline from diffusers.utils import export_to_video -from huggingface_hub import create_repo, upload_folder from loguru import logger from peft import LoraConfig, get_peft_model_state_dict from peft.tuners.tuners_utils import BaseTunerLayer @@ -47,12 +46,12 @@ from ltxv_trainer.config import LtxvTrainerConfig from ltxv_trainer.datasets import PrecomputedDataset +from ltxv_trainer.hub_utils import push_to_hub from ltxv_trainer.model_loader import load_ltxv_components from ltxv_trainer.quantization import quantize_model from ltxv_trainer.timestep_samplers import SAMPLERS from ltxv_trainer.utils import get_gpu_memory_gb, open_image_as_srgb - # Disable irrelevant warnings from transformers os.environ["TOKENIZERS_PARALLELISM"] = "true" @@ -157,6 +156,8 @@ def train( # noqa: PLR0912, PLR0915 # Track when actual training starts (after compilation) actual_training_start = None + sampled_videos_paths = None + with Live(Panel(Group(train_progress, sample_progress)), refresh_per_second=2): task = train_progress.add_task( "Training", @@ -167,7 +168,7 @@ def train( # noqa: PLR0912, PLR0915 ) if cfg.validation.interval: - self._sample_videos(sample_progress) + sampled_videos_paths = self._sample_videos(sample_progress) for step in range(cfg.optimization.steps): # Get next batch, reset the dataloader if needed @@ -204,7 +205,6 @@ def train( # noqa: PLR0912, PLR0915 if self._lr_scheduler is not None: self._lr_scheduler.step() - # Run validation if needed if ( cfg.validation.interval @@ -295,22 +295,7 @@ def train( # noqa: PLR0912, PLR0915 # Upload artifacts to hub if enabled if cfg.hub.push_to_hub: - repo_id = cfg.hub.hub_model_id or Path(cfg.output_dir).name - repo_id = create_repo(token=cfg.hub.hub_token, repo_id=repo_id, exist_ok=True) - video_filenames = sampled_videos_paths if sampled_videos_paths else [] - - save_model_card( - output_dir=cfg.output_dir, - repo_id=repo_id, - pretrained_model_name_or_path=cfg.model.model_source, - videos=video_filenames, - validation_prompts=self._config.validation.prompts - ) - - upload_folder( - repo_id=repo_id, - folder_path=Path(self._config.output_dir), - ) + push_to_hub(saved_path, sampled_videos_paths, self._config) # Log the training statistics self._log_training_stats(stats) diff --git a/src/ltxv_trainer/utils.py b/src/ltxv_trainer/utils.py index 723ed13..dbbaf71 100644 --- a/src/ltxv_trainer/utils.py +++ b/src/ltxv_trainer/utils.py @@ -1,21 +1,13 @@ import io import subprocess -import os -from typing import List, Union from pathlib import Path import torch from loguru import logger from PIL import ExifTags, Image, ImageCms, ImageOps from PIL.Image import Image as PilImage -import numpy as np -from diffusers.utils import export_to_video -from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card - -from PIL import Image - def get_gpu_memory_gb(device: torch.device) -> float: """Get current GPU memory usage in GB using nvidia-smi""" try: @@ -77,149 +69,3 @@ def open_image_as_srgb(image_path: str | Path | io.BytesIO) -> PilImage: srgb_img.info["icc_profile"] = srgb_profile_data return srgb_img - - -def save_model_card( - output_dir: str, - repo_id: str, - pretrained_model_name_or_path: str, - videos: Union[List[str], Union[List[PilImage.Image], List[np.ndarray]]], - validation_prompts: List[str], - fps: int = 30, -) -> None: - widget_dict = [] - if videos is not None and len(videos) > 0: - for i, (video, validation_prompt) in enumerate(zip(videos, validation_prompts)): - if not isinstance(video, str): - export_to_video(video, os.path.join(output_dir, f"final_video_{i}.mp4"), fps=fps) - widget_dict.append( - { - "text": validation_prompt if validation_prompt else " ", - "output": {"url": video if isinstance(video, str) else f"final_video_{i}.mp4"}, - } - ) - if pretrained_model_name_or_path not in ["Lightricks/LTX-Video", "Lightricks/LTX-Video-0.9.5"]: - pretrained_model_name_or_path = "Lightricks/LTX-Video" - - model_description = f""" -# LoRA Finetune - - - -## Model description - -This is a lora finetune of model: `{pretrained_model_name_or_path}`. - -The model was trained using [`LTX-Video Community Trainer`](https://github.com/Lightricks/LTX-Video-Trainer). - -## Download model - -[Download LoRA]({repo_id}/tree/main) in the Files & Versions tab. - -## Usage - -### Using Trained LoRAs with `diffusers`: -Requires the [๐Ÿงจ Diffusers library](https://github.com/huggingface/diffusers) installed. - -Text-to-Video generation using the trained LoRA: -```python -import torch -from diffusers import LTXPipeline -from diffusers.utils import export_to_video -from huggingface_hub import hf_hub_download - -pipe = LTXPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16) -lora_weights = hf_hub_download(repo_id={repo_id}, filename="path_to_your_saved_weights.safetensors") -state_dict = load_file(lora_weights) -pipe.load_lora_weights(state_dict) -pipe.to("cuda") - -prompt = "{validation_prompts[0]}" -negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted" -video = pipe( - prompt=prompt, - negative_prompt=negative_prompt, - width=704, - height=480, - num_frames=161, - num_inference_steps=50, -).frames[0] -export_to_video(video, "output.mp4", fps=24) -``` - -For Image-to-Video: -```python -import torch -from diffusers import LTXImageToVideoPipeline -from diffusers.utils import export_to_video, load_image - -pipe = LTXImageToVideoPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16) -lora_weights = hf_hub_download(repo_id={repo_id}, filename="saved_weights_path.safetensors") -state_dict = load_file(lora_weights) -pipe.load_lora_weights(state_dict) -pipe.to("cuda") - -image = load_image( - "url_to_your_image", -) -prompt = "{validation_prompts[0]}" -negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted" - -video = pipe( - image=image, - prompt=prompt, - negative_prompt=negative_prompt, - width=704, - height=480, - num_frames=161, - num_inference_steps=50, -).frames[0] -export_to_video(video, "output.mp4", fps=24) -``` - -### ๐Ÿ”Œ Using Trained LoRAs in ComfyUI - -After training your LoRA, you can use it in ComfyUI by following these steps: - -1. Copy your trained LoRA weights (`.safetensors` file) to the `models/loras` folder in your ComfyUI installation. - -2. Install the ComfyUI-LTXVideoLoRA custom node: - - ```bash - # In the root folder of your ComfyUI installation - cd custom_nodes - git clone https://github.com/dorpxam/ComfyUI-LTXVideoLoRA - pip install -r ComfyUI-LTXVideoLoRA/requirements.txt - ``` - -3. In your ComfyUI workflow: - - Add the "LTXV LoRA Selector" node to choose your LoRA file - - Connect it to the "LTXV LoRA Loader" node to apply the LoRA to your generation - -You can find reference Text-to-Video (T2V) and Image-to-Video (I2V) workflows in the [official LTXV ComfyUI repository](https://github.com/Lightricks/ComfyUI-LTXVideo). - -```py -TODO -``` - -For more details, including weighting, merging and fusing LoRAs, check the [documentation](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading_adapters) on loading LoRAs in diffusers. -""" - - model_card = load_or_create_model_card( - repo_id_or_path=repo_id, - from_training=True, - base_model=pretrained_model_name_or_path, - model_description=model_description, - widget=widget_dict, - ) - tags = [ - "text-to-video", - "image-to-video", - "ltx-video" - "diffusers", - "lora", - "template:sd-lora", - ] - - model_card = populate_model_card(model_card, tags=tags) - model_card.save(os.path.join(output_dir, "README.md")) \ No newline at end of file diff --git a/templates/model_card.md b/templates/model_card.md new file mode 100644 index 0000000..b70682e --- /dev/null +++ b/templates/model_card.md @@ -0,0 +1,42 @@ +# {model_name} + +This is a fine-tuned version of [`{base_model}`]({base_model_link}) trained on custom data. + +## Model Details + +- **Base Model:** [`{base_model}`]({base_model_link}) +- **Training Type:** {training_type} +- **Training Steps:** {training_steps} +- **Learning Rate:** {learning_rate} +- **Batch Size:** {batch_size} + +## Sample Outputs + +| | | | | +|:---:|:---:|:---:|:---:| +{sample_grid} + +## Usage + +This model is designed to be used with the LTXV (Lightricks Text-to-Video) pipeline. + +### ๐Ÿ”Œ Using Trained LoRAs in ComfyUI +In order to use the trained lora in comfy: +1. Copy your comfyui trained LoRA weights (`comfyui..safetensors` file) to the `models/loras` folder in your ComfyUI installation. +2. In your ComfyUI workflow: + - Add the "LTXV LoRA Selector" node to choose your LoRA file + - Connect it to the "LTXV LoRA Loader" node to apply the LoRA to your generation + +You can find reference Text-to-Video (T2V) and Image-to-Video (I2V) workflows in the [official LTXV ComfyUI repository](https://github.com/Lightricks/ComfyUI-LTXVideo). + +### Example Prompts + +{validation_prompts} + + +This model inherits the license of the base model ([`{base_model}`]({base_model_link})). + +## Acknowledgments + +- Base model by [Lightricks](https://huggingface.co/Lightricks) +- Training infrastructure: [LTX-Video-Trainer](https://github.com/Lightricks/ltx-video-trainer)