From e07e447354928a6ece1b6e8562dccc988611735f Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Mon, 24 Mar 2025 11:05:37 +0200
Subject: [PATCH 1/4] add push_to_hub support

---
 src/ltxv_trainer/config.py  | 17 +++++++++++++++++
 src/ltxv_trainer/trainer.py | 10 ++++++++++
 2 files changed, 27 insertions(+)

diff --git a/src/ltxv_trainer/config.py b/src/ltxv_trainer/config.py
index c1606a6..27096c2 100644
--- a/src/ltxv_trainer/config.py
+++ b/src/ltxv_trainer/config.py
@@ -234,6 +234,22 @@ class CheckpointsConfig(ConfigBaseModel):
     )
 
 
+class HubConfig(ConfigBaseModel):
+    """Configuration for Hugging Face Hub integration"""
+
+    push_to_hub: bool = Field(
+        default=False,
+        description="Whether to push the model weights to the Hugging Face Hub"
+    )
+    hub_model_id: str = Field(
+        default=None,
+        description="Hugging Face Hub repository ID (e.g., 'username/repo-name')"
+    )
+    hub_token: str = Field(
+        default=None,
+        description="Hugging Face token. If None, will use the token from the Hugging Face CLI"
+    )
+
 class FlowMatchingConfig(ConfigBaseModel):
     """Configuration for flow matching training"""
 
@@ -259,6 +275,7 @@ class LtxvTrainerConfig(ConfigBaseModel):
     data: DataConfig = Field(default_factory=DataConfig)
     validation: ValidationConfig = Field(default_factory=ValidationConfig)
     checkpoints: CheckpointsConfig = Field(default_factory=CheckpointsConfig)
+    hub: HubConfig = Field(default_factory=HubConfig)
     flow_matching: FlowMatchingConfig = Field(default_factory=FlowMatchingConfig)
 
     # General configuration
diff --git a/src/ltxv_trainer/trainer.py b/src/ltxv_trainer/trainer.py
index 9128f32..19065e4 100644
--- a/src/ltxv_trainer/trainer.py
+++ b/src/ltxv_trainer/trainer.py
@@ -14,6 +14,7 @@
 from accelerate.utils import set_seed
 from diffusers import LTXPipeline
 from diffusers.utils import export_to_video
+from huggingface_hub import create_repo, upload_folder
 from loguru import logger
 from peft import LoraConfig, get_peft_model_state_dict
 from peft.tuners.tuners_utils import BaseTunerLayer
@@ -286,6 +287,15 @@ def train(  # noqa: PLR0912, PLR0915
             if self._accelerator.is_main_process:
                 saved_path = self._save_checkpoint()
 
+                # Upload artifacts to hub if enabled
+                if cfg.hub.push_to_hub:
+                    repo_id = cfg.hub.hub_model_id or Path(cfg.output_dir).name
+                    repo_id = create_repo(token=cfg.hub.hub_token, repo_id=repo_id, exist_ok=True)
+                    upload_folder(
+                        repo_id=repo_id,
+                        folder_path=Path(self._config.output_dir),
+                    )
+
                 # Log the training statistics
                 self._log_training_stats(stats)
 

From 4889d2df15a8cad84e2e19875ba266b75d051676 Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Tue, 25 Mar 2025 12:25:45 +0200
Subject: [PATCH 2/4] add save_model_card to utils.py

---
 src/ltxv_trainer/utils.py | 95 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/src/ltxv_trainer/utils.py b/src/ltxv_trainer/utils.py
index dbbaf71..e1dd000 100644
--- a/src/ltxv_trainer/utils.py
+++ b/src/ltxv_trainer/utils.py
@@ -1,13 +1,21 @@
 import io
 import subprocess
+import os
+from typing import List, Union
 from pathlib import Path
 
 import torch
 from loguru import logger
 from PIL import ExifTags, Image, ImageCms, ImageOps
 from PIL.Image import Image as PilImage
+import numpy as np
+from diffusers.utils import export_to_video
+from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
 
 
+
+from PIL import Image
+
 def get_gpu_memory_gb(device: torch.device) -> float:
     """Get current GPU memory usage in GB using nvidia-smi"""
     try:
@@ -69,3 +77,90 @@ def open_image_as_srgb(image_path: str | Path | io.BytesIO) -> PilImage:
         srgb_img.info["icc_profile"] = srgb_profile_data
 
     return srgb_img
+
+
+def save_model_card(
+    output_dir: str,
+    repo_id: str,
+    pretrained_model_name_or_path: str,
+    videos: Union[List[str], Union[List[PilImage.Image], List[np.ndarray]]],
+    validation_prompts: List[str],
+    fps: int = 30,
+) -> None:
+    widget_dict = []
+    if videos is not None and len(videos) > 0:
+        for i, (video, validation_prompt) in enumerate(zip(videos, validation_prompts)):
+            if not isinstance(video, str):
+                export_to_video(video, os.path.join(output_dir, f"final_video_{i}.mp4"), fps=fps)
+            widget_dict.append(
+                {
+                    "text": validation_prompt if validation_prompt else " ",
+                    "output": {"url": video if isinstance(video, str) else f"final_video_{i}.mp4"},
+                }
+            )
+
+    model_description = f"""
+# LoRA Finetune
+
+<Gallery />
+
+## Model description
+
+This is a lora finetune of model: `{pretrained_model_name_or_path}`.
+
+The model was trained using [`LTX-Video Community Trainer`](https://github.com/Lightricks/LTX-Video-Trainer).
+
+## Download model
+
+[Download LoRA]({repo_id}/tree/main) in the Files & Versions tab.
+
+## Usage
+
+### Using Trained LoRAs with `diffusers`:
+Requires the [🧨 Diffusers library](https://github.com/huggingface/diffusers) installed.
+
+### 🔌 Using Trained LoRAs in ComfyUI
+
+After training your LoRA, you can use it in ComfyUI by following these steps:
+
+1. Copy your trained LoRA weights (`.safetensors` file) to the `models/loras` folder in your ComfyUI installation.
+
+2. Install the ComfyUI-LTXVideoLoRA custom node:
+
+   ```bash
+   # In the root folder of your ComfyUI installation
+   cd custom_nodes
+   git clone https://github.com/dorpxam/ComfyUI-LTXVideoLoRA
+   pip install -r ComfyUI-LTXVideoLoRA/requirements.txt
+   ```
+
+3. In your ComfyUI workflow:
+   - Add the "LTXV LoRA Selector" node to choose your LoRA file
+   - Connect it to the "LTXV LoRA Loader" node to apply the LoRA to your generation
+
+You can find reference Text-to-Video (T2V) and Image-to-Video (I2V) workflows in the [official LTXV ComfyUI repository](https://github.com/Lightricks/ComfyUI-LTXVideo).
+
+```py
+TODO
+```
+
+For more details, including weighting, merging and fusing LoRAs, check the [documentation](https://huggingface.co/docs/diffusers/main/en/using-diffusers/loading_adapters) on loading LoRAs in diffusers.
+"""
+
+    model_card = load_or_create_model_card(
+        repo_id_or_path=repo_id,
+        from_training=True,
+        base_model=pretrained_model_name_or_path,
+        model_description=model_description,
+        widget=widget_dict,
+    )
+    tags = [
+        "text-to-video",
+        "ltx-video"
+        "diffusers",
+        "lora",
+        "template:sd-lora",
+    ]
+
+    model_card = populate_model_card(model_card, tags=tags)
+    model_card.save(os.path.join(output_dir, "README.md"))
\ No newline at end of file

From 762b8b6a26526fa1c249a3f963fb19d7097b36e0 Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Tue, 25 Mar 2025 12:54:18 +0200
Subject: [PATCH 3/4] add `save_model_card` to trainer adjust `save_model_card`
 to include inference examples

---
 src/ltxv_trainer/trainer.py | 12 +++++++-
 src/ltxv_trainer/utils.py   | 59 +++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/src/ltxv_trainer/trainer.py b/src/ltxv_trainer/trainer.py
index 19065e4..e618762 100644
--- a/src/ltxv_trainer/trainer.py
+++ b/src/ltxv_trainer/trainer.py
@@ -51,7 +51,7 @@
 from ltxv_trainer.model_loader import load_ltxv_components
 from ltxv_trainer.quantization import quantize_model
 from ltxv_trainer.timestep_samplers import SAMPLERS
-from ltxv_trainer.utils import get_gpu_memory_gb
+from ltxv_trainer.utils import get_gpu_memory_gb, save_model_card
 
 # Disable irrelevant warnings from transformers
 os.environ["TOKENIZERS_PARALLELISM"] = "true"
@@ -291,6 +291,16 @@ def train(  # noqa: PLR0912, PLR0915
                 if cfg.hub.push_to_hub:
                     repo_id = cfg.hub.hub_model_id or Path(cfg.output_dir).name
                     repo_id = create_repo(token=cfg.hub.hub_token, repo_id=repo_id, exist_ok=True)
+                    video_filenames = sampled_videos_paths if sampled_videos_paths else []
+
+                    save_model_card(
+                        output_dir=cfg.output_dir,
+                        repo_id=repo_id,
+                        pretrained_model_name_or_path=cfg.model.model_source,
+                        videos=video_filenames,
+                        validation_prompts=self._config.validation.prompts
+                    )
+
                     upload_folder(
                         repo_id=repo_id,
                         folder_path=Path(self._config.output_dir),
diff --git a/src/ltxv_trainer/utils.py b/src/ltxv_trainer/utils.py
index e1dd000..723ed13 100644
--- a/src/ltxv_trainer/utils.py
+++ b/src/ltxv_trainer/utils.py
@@ -98,6 +98,8 @@ def save_model_card(
                     "output": {"url": video if isinstance(video, str) else f"final_video_{i}.mp4"},
                 }
             )
+    if pretrained_model_name_or_path not in ["Lightricks/LTX-Video", "Lightricks/LTX-Video-0.9.5"]:
+        pretrained_model_name_or_path = "Lightricks/LTX-Video"
 
     model_description = f"""
 # LoRA Finetune
@@ -119,6 +121,62 @@ def save_model_card(
 ### Using Trained LoRAs with `diffusers`:
 Requires the [🧨 Diffusers library](https://github.com/huggingface/diffusers) installed.
 
+Text-to-Video generation using the trained LoRA:
+```python
+import torch
+from diffusers import LTXPipeline
+from diffusers.utils import export_to_video
+from huggingface_hub import hf_hub_download
+
+pipe = LTXPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
+lora_weights = hf_hub_download(repo_id={repo_id}, filename="path_to_your_saved_weights.safetensors")
+state_dict = load_file(lora_weights)
+pipe.load_lora_weights(state_dict)
+pipe.to("cuda")
+
+prompt = "{validation_prompts[0]}"
+negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
+video = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    width=704,
+    height=480,
+    num_frames=161,
+    num_inference_steps=50,
+).frames[0]
+export_to_video(video, "output.mp4", fps=24)
+```
+
+For Image-to-Video:
+```python
+import torch
+from diffusers import LTXImageToVideoPipeline
+from diffusers.utils import export_to_video, load_image
+
+pipe = LTXImageToVideoPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
+lora_weights = hf_hub_download(repo_id={repo_id}, filename="saved_weights_path.safetensors")
+state_dict = load_file(lora_weights)
+pipe.load_lora_weights(state_dict)
+pipe.to("cuda")
+
+image = load_image(
+    "url_to_your_image",
+)
+prompt = "{validation_prompts[0]}"
+negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
+
+video = pipe(
+    image=image,
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    width=704,
+    height=480,
+    num_frames=161,
+    num_inference_steps=50,
+).frames[0]
+export_to_video(video, "output.mp4", fps=24)
+```
+
 ### 🔌 Using Trained LoRAs in ComfyUI
 
 After training your LoRA, you can use it in ComfyUI by following these steps:
@@ -156,6 +214,7 @@ def save_model_card(
     )
     tags = [
         "text-to-video",
+        "image-to-video",
         "ltx-video"
         "diffusers",
         "lora",

From 844c3aef5f1af3f864b4c6347da269b14f14b4a7 Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Tue, 25 Mar 2025 15:09:26 +0200
Subject: [PATCH 4/4] add `save_model_card` to trainer adjust `save_model_card`
 to include inference examples

---
 src/ltxv_trainer/utils.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/ltxv_trainer/utils.py b/src/ltxv_trainer/utils.py
index 723ed13..b69d89a 100644
--- a/src/ltxv_trainer/utils.py
+++ b/src/ltxv_trainer/utils.py
@@ -126,12 +126,10 @@ def save_model_card(
 import torch
 from diffusers import LTXPipeline
 from diffusers.utils import export_to_video
-from huggingface_hub import hf_hub_download
 
 pipe = LTXPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
-lora_weights = hf_hub_download(repo_id={repo_id}, filename="path_to_your_saved_weights.safetensors")
-state_dict = load_file(lora_weights)
-pipe.load_lora_weights(state_dict)
+pipe.load_lora_weights("{repo_id}", adapter_name="ltxv-lora")
+pipe.set_adapters(["ltxv-lora"], [0.75])
 pipe.to("cuda")
 
 prompt = "{validation_prompts[0]}"
@@ -141,7 +139,6 @@ def save_model_card(
     negative_prompt=negative_prompt,
     width=704,
     height=480,
-    num_frames=161,
     num_inference_steps=50,
 ).frames[0]
 export_to_video(video, "output.mp4", fps=24)
@@ -154,9 +151,8 @@ def save_model_card(
 from diffusers.utils import export_to_video, load_image
 
 pipe = LTXImageToVideoPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
-lora_weights = hf_hub_download(repo_id={repo_id}, filename="saved_weights_path.safetensors")
-state_dict = load_file(lora_weights)
-pipe.load_lora_weights(state_dict)
+pipe.load_lora_weights("{repo_id}", adapter_name="ltxv-lora")
+pipe.set_adapters(["ltxv-lora"], [0.75])
 pipe.to("cuda")
 
 image = load_image(
@@ -171,7 +167,6 @@ def save_model_card(
     negative_prompt=negative_prompt,
     width=704,
     height=480,
-    num_frames=161,
     num_inference_steps=50,
 ).frames[0]
 export_to_video(video, "output.mp4", fps=24)