diff --git a/src/classes/assets.py b/src/classes/assets.py
index 559f4a609..0ae9a9676 100644
--- a/src/classes/assets.py
+++ b/src/classes/assets.py
@@ -90,6 +90,12 @@ def get_assets_path(file_path=None, create_paths=True):
                 os.mkdir(asset_clipboard_folder)
                 log.info("New clipboard folder: {}".format(asset_clipboard_folder))
 
+            # Create asset ComfyUI output folder
+            asset_comfy_output_folder = os.path.join(asset_path, "comfyui-output")
+            if not os.path.exists(asset_comfy_output_folder):
+                os.mkdir(asset_comfy_output_folder)
+                log.info("New ComfyUI output folder: {}".format(asset_comfy_output_folder))
+
         return asset_path
 
     except Exception as ex:
diff --git a/src/classes/clip_utils.py b/src/classes/clip_utils.py
index 5c2fd3233..12a3dfea3 100644
--- a/src/classes/clip_utils.py
+++ b/src/classes/clip_utils.py
@@ -25,14 +25,50 @@
  """
 
 import logging
+import json
 from fractions import Fraction
 from typing import Any, Mapping, Optional, Tuple
 
+import openshot
+
 from classes.app import get_app
 
 logger = logging.getLogger(__name__)
 
 
+def apply_file_caption_to_clip(clip_data: Any, file_obj: Any, *, dedupe: bool = True) -> bool:
+    """Attach a Caption effect to clip_data when file metadata includes caption text."""
+    if not isinstance(clip_data, Mapping):
+        return False
+    file_data = getattr(file_obj, "data", None)
+    if not isinstance(file_data, Mapping):
+        return False
+    caption_text = str(file_data.get("caption", "") or "").strip()
+    if not caption_text:
+        return False
+
+    effects = clip_data.get("effects")
+    if not isinstance(effects, list):
+        effects = list(effects) if effects else []
+        clip_data["effects"] = effects
+
+    if dedupe:
+        for effect in effects:
+            if not isinstance(effect, Mapping):
+                continue
+            if str(effect.get("class_name", "")).lower() == "caption":
+                existing_text = str(effect.get("caption_text", "") or "").strip()
+                if existing_text == caption_text:
+                    return False
+
+    caption_effect = openshot.EffectInfo().CreateEffect("Caption")
+    caption_effect.Id(get_app().project.generate_id())
+    caption_json = json.loads(caption_effect.Json())
+    caption_json["caption_text"] = caption_text
+    effects.append(caption_json)
+    return True
+
+
 def _as_mapping(candidate: Any) -> Mapping[str, Any]:
     """Return dict-style metadata for clips, readers, or similar."""
     if isinstance(candidate, Mapping):
diff --git a/src/classes/comfy_client.py b/src/classes/comfy_client.py
new file mode 100644
index 000000000..7b2ece3d2
--- /dev/null
+++ b/src/classes/comfy_client.py
@@ -0,0 +1,849 @@
+"""
+ @file
+ @brief This file contains a small ComfyUI HTTP/WebSocket client.
+ @author Jonathan Thomas <jonathan@openshot.org>
+
+ @section LICENSE
+
+ Copyright (c) 2008-2026 OpenShot Studios, LLC
+ (http://www.openshotstudios.com). This file is part of
+ OpenShot Video Editor (http://www.openshot.org), an open-source project
+ dedicated to delivering high quality video editing and animation solutions
+ to the world.
+
+ OpenShot Video Editor is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ OpenShot Video Editor is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with OpenShot Library.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import json
+import os
+import ssl
+import base64
+import uuid
+from datetime import datetime
+import re
+import socket
+import struct
+from urllib.error import HTTPError
+from urllib.request import Request, urlopen
+from urllib.parse import quote, urlencode
+from urllib.parse import urlparse
+
+from classes import info
+from classes.logger import log
+
+
+class ComfyProgressSocket:
+    """Minimal WebSocket client for ComfyUI /ws progress events."""
+
+    def __init__(self, base_url, client_id):
+        self.base_url = str(base_url or "").rstrip("/")
+        self.client_id = str(client_id or "")
+        self.sock = None
+        self._connect()
+
+    def _connect(self):
+        parsed = urlparse(self.base_url)
+        scheme = parsed.scheme.lower()
+        host = parsed.hostname
+        if not host:
+            raise RuntimeError("Invalid ComfyUI URL for websocket")
+        port = parsed.port or (443 if scheme == "https" else 80)
+        base_path = (parsed.path or "").rstrip("/")
+        ws_path = "{}/ws".format(base_path) if base_path else "/ws"
+        path = "{}?clientId={}".format(ws_path, quote(self.client_id))
+
+        raw = socket.create_connection((host, port), timeout=6.0)
+        if scheme == "https":
+            ctx = ssl.create_default_context()
+            raw = ctx.wrap_socket(raw, server_hostname=host)
+        # Allow slower remote/proxied websocket handshakes.
+        raw.settimeout(6.0)
+        self.sock = raw
+
+        key = base64.b64encode(os.urandom(16)).decode("ascii")
+        req = (
+            "GET {} HTTP/1.1\r\n"
+            "Host: {}:{}\r\n"
+            "Upgrade: websocket\r\n"
+            "Connection: Upgrade\r\n"
+            "Origin: {}://{}:{}\r\n"
+            "Pragma: no-cache\r\n"
+            "Cache-Control: no-cache\r\n"
+            "Sec-WebSocket-Key: {}\r\n"
+            "Sec-WebSocket-Version: 13\r\n"
+            "\r\n"
+        ).format(path, host, port, scheme, host, port, key)
+        self.sock.sendall(req.encode("utf-8"))
+
+        response = self._recv_http_headers()
+        if " 101 " not in response.split("\r\n", 1)[0]:
+            raise RuntimeError("WebSocket upgrade failed: {}".format(response.split("\r\n", 1)[0]))
+        # Use short timeout for regular frame polling after successful handshake.
+        self.sock.settimeout(0.25)
+
+    def close(self):
+        if self.sock is not None:
+            try:
+                self.sock.close()
+            except OSError:
+                pass
+            self.sock = None
+
+    def poll_progress(self, prompt_id=None, max_messages=8):
+        """Read available frames and return latest progress payload.
+
+        If prompt_id is provided, events are filtered to that prompt id.
+        If prompt_id is None/empty, events from any prompt on this websocket
+        are accepted (useful for meta-batch follow-up prompts).
+        """
+        if not self.sock:
+            return None
+        latest = None
+        latest_rank = None
+        prompt_key = str(prompt_id or "").strip()
+        for _ in range(max_messages):
+            frame = self._recv_frame_nonblocking()
+            if frame is None:
+                break
+            opcode, payload = frame
+
+            # Ping -> pong
+            if opcode == 0x9:
+                self._send_control_frame(0xA, payload)
+                continue
+            if opcode == 0x8:
+                self.close()
+                break
+            if opcode != 0x1:
+                continue
+            try:
+                msg = json.loads(payload.decode("utf-8"))
+            except Exception:
+                continue
+            if not isinstance(msg, dict):
+                continue
+
+            event_type = msg.get("type")
+            event_data = msg.get("data", {})
+            if event_type == "progress":
+                if not isinstance(event_data, dict):
+                    continue
+                event_prompt = str(event_data.get("prompt_id", ""))
+                if prompt_key and (not event_prompt or event_prompt != prompt_key):
+                    continue
+                value = float(event_data.get("value", 0.0))
+                maximum = float(event_data.get("max", 0.0))
+                if maximum > 0:
+                    candidate = {
+                        "percent": int(max(0, min(99, round((value / maximum) * 100.0)))),
+                        "value": value,
+                        "max": maximum,
+                        "node": str(event_data.get("node", "")),
+                        "type": "progress",
+                        "prompt_id": event_prompt,
+                    }
+                    # Prefer unfinished progress, and prefer explicit "progress" events.
+                    unfinished = (value + 1e-6) < maximum
+                    rank = (1 if unfinished else 0, 2, maximum)
+                    if latest is None or rank > latest_rank:
+                        latest = candidate
+                        latest_rank = rank
+            elif event_type == "progress_state":
+                # Newer Comfy events: data={prompt_id, nodes={node_id:{value,max}}}
+                if not isinstance(event_data, dict):
+                    continue
+                event_prompt = str(event_data.get("prompt_id", ""))
+                if prompt_key and (not event_prompt or event_prompt != prompt_key):
+                    continue
+                nodes = event_data.get("nodes", {})
+                if not isinstance(nodes, dict):
+                    continue
+                # Prefer unfinished node progress; only fall back to completed states.
+                best = None
+                best_rank = None
+                for node_id, node_state in nodes.items():
+                    if not isinstance(node_state, dict):
+                        continue
+                    value = float(node_state.get("value", 0.0))
+                    maximum = float(node_state.get("max", 0.0))
+                    if maximum > 0:
+                        candidate = {
+                            "percent": int(max(0, min(99, round((value / maximum) * 100.0)))),
+                            "value": value,
+                            "max": maximum,
+                            "node": str(node_id),
+                            "type": "progress_state",
+                            "prompt_id": event_prompt,
+                        }
+                        unfinished = (value + 1e-6) < maximum
+                        rank = (1 if unfinished else 0, maximum)
+                        if best is None or rank > best_rank:
+                            best = candidate
+                            best_rank = rank
+                if best is not None:
+                    rank = (best_rank[0], 1, float(best.get("max", 0.0)))
+                    if latest is None or rank > latest_rank:
+                        latest = best
+                        latest_rank = rank
+        return latest
+
+    def _recv_http_headers(self):
+        data = b""
+        while b"\r\n\r\n" not in data:
+            chunk = self.sock.recv(4096)
+            if not chunk:
+                break
+            data += chunk
+            if len(data) > 65536:
+                break
+        return data.decode("utf-8", errors="replace")
+
+    def _recv_exact(self, size):
+        chunks = []
+        remaining = size
+        while remaining > 0:
+            chunk = self.sock.recv(remaining)
+            if not chunk:
+                raise RuntimeError("WebSocket connection closed")
+            chunks.append(chunk)
+            remaining -= len(chunk)
+        return b"".join(chunks)
+
+    def _recv_frame_nonblocking(self):
+        try:
+            header = self.sock.recv(2)
+            if not header:
+                return None
+        except socket.timeout:
+            return None
+        except OSError:
+            return None
+
+        if len(header) < 2:
+            return None
+        b1, b2 = header[0], header[1]
+        opcode = b1 & 0x0F
+        masked = (b2 & 0x80) != 0
+        length = b2 & 0x7F
+
+        if length == 126:
+            length = struct.unpack("!H", self._recv_exact(2))[0]
+        elif length == 127:
+            length = struct.unpack("!Q", self._recv_exact(8))[0]
+
+        mask_key = b""
+        if masked:
+            mask_key = self._recv_exact(4)
+
+        payload = self._recv_exact(length) if length > 0 else b""
+        if masked and payload:
+            payload = bytes(payload[i] ^ mask_key[i % 4] for i in range(len(payload)))
+
+        return opcode, payload
+
+    def _send_control_frame(self, opcode, payload=b""):
+        if self.sock is None:
+            return
+        payload = payload or b""
+        first = 0x80 | (opcode & 0x0F)
+        # Client frames must be masked.
+        mask = os.urandom(4)
+        length = len(payload)
+        if length < 126:
+            header = bytes([first, 0x80 | length])
+        elif length < (1 << 16):
+            header = bytes([first, 0x80 | 126]) + struct.pack("!H", length)
+        else:
+            header = bytes([first, 0x80 | 127]) + struct.pack("!Q", length)
+        masked_payload = bytes(payload[i] ^ mask[i % 4] for i in range(length))
+        self.sock.sendall(header + mask + masked_payload)
+
+
+class ComfyClient:
+    """Minimal ComfyUI client using stdlib HTTP."""
+    ERROR_MAX_CHARS = 1800
+
+    def __init__(self, base_url):
+        self.base_url = str(base_url or "").rstrip("/")
+
+    @staticmethod
+    def _write_debug_error(payload):
+        debug_dir = info.COMFYUI_PATH
+        try:
+            os.makedirs(debug_dir, exist_ok=True)
+            debug_path = os.path.join(debug_dir, "debug_error.json")
+            with open(debug_path, "w", encoding="utf-8") as handle:
+                json.dump(payload, handle, indent=2, sort_keys=True)
+                handle.write("\n")
+        except Exception:
+            log.warning("Failed writing Comfy debug error payload", exc_info=True)
+
+    def _write_debug_prompt_payload(self, prompt_graph, client_id):
+        debug_dir = info.COMFYUI_PATH
+        try:
+            os.makedirs(debug_dir, exist_ok=True)
+            debug_path = os.path.join(debug_dir, "debug.json")
+            payload = {
+                "generated_at_utc": datetime.utcnow().isoformat() + "Z",
+                "comfy_url": self.base_url,
+                "client_id": str(client_id or ""),
+                "prompt": prompt_graph,
+            }
+            with open(debug_path, "w", encoding="utf-8") as handle:
+                json.dump(payload, handle, indent=2, sort_keys=True)
+                handle.write("\n")
+        except Exception:
+            log.warning("Failed writing Comfy sent prompt payload", exc_info=True)
+
+    @staticmethod
+    def open_progress_socket(base_url, client_id):
+        return ComfyProgressSocket(base_url, client_id)
+
+    def ping(self, timeout=0.5):
+        with urlopen("{}/system_stats".format(self.base_url), timeout=timeout) as response:
+            return int(response.status) >= 200 and int(response.status) < 300
+
+    def queue_prompt(self, prompt_graph, client_id):
+        prompt_graph = self._rewrite_prompt_local_file_inputs(prompt_graph)
+        self._write_debug_prompt_payload(prompt_graph, client_id)
+        payload = json.dumps({"prompt": prompt_graph, "client_id": client_id}).encode("utf-8")
+        req = Request(
+            "{}/prompt".format(self.base_url),
+            data=payload,
+            method="POST",
+            headers={"Content-Type": "application/json"},
+        )
+        try:
+            with urlopen(req, timeout=10.0) as response:
+                data = json.loads(response.read().decode("utf-8"))
+        except HTTPError as ex:
+            details = ""
+            try:
+                error_data = json.loads(ex.read().decode("utf-8"))
+                ComfyClient._write_debug_error(error_data)
+                error_obj = error_data.get("error", {})
+                if isinstance(error_obj, dict):
+                    details = error_obj.get("type") or error_obj.get("message") or ""
+                else:
+                    details = str(error_obj or "")
+
+                node_errors = error_data.get("node_errors", {})
+                node_error_text = self._format_node_errors(node_errors)
+                if node_error_text:
+                    details = "{}\n{}".format(details or "prompt validation failed", node_error_text)
+                elif not details:
+                    details = ComfyClient.summarize_error_text(error_data)
+                else:
+                    details = "{}\n{}".format(details, ComfyClient.summarize_error_text(error_data))
+            except Exception:
+                details = str(ex)
+            raise RuntimeError("ComfyUI prompt rejected: {}".format(ComfyClient.summarize_error_text(details)))
+        return data.get("prompt_id")
+
+    def _rewrite_prompt_local_file_inputs(self, prompt_graph):
+        """Rewrite local absolute paths for image/video loader nodes to uploaded Comfy input refs."""
+        if not isinstance(prompt_graph, dict):
+            return prompt_graph
+        rewritten = dict(prompt_graph)
+
+        def _annotated(path_text):
+            path_text = str(path_text or "").strip()
+            return path_text.endswith("[input]") or path_text.endswith("[output]") or path_text.endswith("[temp]")
+
+        for node_id, node in rewritten.items():
+            if not isinstance(node, dict):
+                continue
+            class_type = str(node.get("class_type", ""))
+            inputs = node.get("inputs", {})
+            if not isinstance(inputs, dict):
+                continue
+
+            if class_type == "LoadImage":
+                image_path = str(inputs.get("image", "")).strip()
+                if image_path and os.path.isabs(image_path) and os.path.exists(image_path) and not _annotated(image_path):
+                    uploaded = self.upload_input_file(image_path)
+                    inputs["image"] = uploaded
+                    node["inputs"] = inputs
+                    rewritten[node_id] = node
+                    log.debug("ComfyClient rewrote LoadImage input node=%s path=%s -> %s", str(node_id), image_path, uploaded)
+            elif class_type == "LoadVideo":
+                video_path = str(inputs.get("file", "")).strip()
+                if video_path and os.path.isabs(video_path) and os.path.exists(video_path) and not _annotated(video_path):
+                    uploaded = self.upload_input_file(video_path)
+                    inputs["file"] = uploaded
+                    node["inputs"] = inputs
+                    rewritten[node_id] = node
+                    log.debug("ComfyClient rewrote LoadVideo input node=%s path=%s -> %s", str(node_id), video_path, uploaded)
+            elif class_type in ("VHS_LoadVideo", "VHS_LoadVideoPath", "VHS_LoadVideoFFmpegPath"):
+                video_path = str(inputs.get("video", "")).strip()
+                if video_path and os.path.isabs(video_path) and os.path.exists(video_path) and not _annotated(video_path):
+                    uploaded = self.upload_input_file(video_path)
+                    # VHS_LoadVideo expects a plain filename from Comfy input options.
+                    # Path-based VHS loaders accept a plain relative path as well.
+                    if uploaded.endswith(" [input]"):
+                        uploaded = uploaded[:-8].strip()
+                    inputs["video"] = uploaded
+                    node["inputs"] = inputs
+                    rewritten[node_id] = node
+                    log.debug(
+                        "ComfyClient rewrote %s input node=%s path=%s -> %s",
+                        class_type,
+                        str(node_id),
+                        video_path,
+                        uploaded,
+                    )
+
+        return rewritten
+
+    @staticmethod
+    def _format_node_errors(node_errors):
+        if not isinstance(node_errors, dict) or not node_errors:
+            return ""
+        lines = []
+        max_lines = 8
+        for node_id, err in node_errors.items():
+            if len(lines) >= max_lines:
+                break
+            if not isinstance(err, dict):
+                lines.append("node {}: {}".format(node_id, str(err)))
+                continue
+            err_type = str(err.get("type", "")).strip()
+            message = str(err.get("message", "")).strip()
+            if not message:
+                details = err.get("details")
+                if details:
+                    message = str(details)
+            if err_type and message:
+                lines.append("node {} [{}]: {}".format(node_id, err_type, message))
+            elif message:
+                lines.append("node {}: {}".format(node_id, message))
+            elif err_type:
+                lines.append("node {} [{}]".format(node_id, err_type))
+        if not lines:
+            return ""
+        return "Node validation errors: {}".format(" | ".join(lines))
+
+    @staticmethod
+    def summarize_error_text(value, max_chars=None):
+        """Return a compact Comfy error text safe for UI display."""
+        if max_chars is None:
+            max_chars = ComfyClient.ERROR_MAX_CHARS
+
+        if isinstance(value, (dict, list, tuple)):
+            value = ComfyClient._limit_error_structure(value)
+            try:
+                text = json.dumps(value, ensure_ascii=True)
+            except Exception:
+                text = str(value)
+        else:
+            text = str(value or "")
+
+        # Remove huge numeric/tensor dumps that make dialogs unreadable.
+        text = re.sub(r"tensor\(\[[\s\S]{250,}?\]\)", "tensor([<omitted>])", text)
+        text = re.sub(r"array\(\[[\s\S]{250,}?\]\)", "array([<omitted>])", text)
+        text = re.sub(r"\[[\d\.\-eE,\s]{350,}\]", "[<numeric array omitted>]", text)
+        text = re.sub(r"\s+", " ", text).strip()
+
+        max_chars = max(300, int(max_chars))
+        if len(text) > max_chars:
+            truncated = len(text) - max_chars
+            text = "{} ... [truncated {} chars]".format(text[:max_chars], truncated)
+        return text
+
+    @staticmethod
+    def _limit_error_structure(value, depth=0, max_depth=4, max_items=10, max_str=260):
+        if depth >= max_depth:
+            return "<...>"
+        if isinstance(value, dict):
+            out = {}
+            for index, key in enumerate(value.keys()):
+                if index >= max_items:
+                    out["<truncated_keys>"] = len(value) - max_items
+                    break
+                out[str(key)] = ComfyClient._limit_error_structure(
+                    value.get(key),
+                    depth=depth + 1,
+                    max_depth=max_depth,
+                    max_items=max_items,
+                    max_str=max_str,
+                )
+            return out
+        if isinstance(value, (list, tuple)):
+            out = []
+            for index, item in enumerate(value):
+                if index >= max_items:
+                    out.append("<truncated_items:{}>".format(len(value) - max_items))
+                    break
+                out.append(
+                    ComfyClient._limit_error_structure(
+                        item,
+                        depth=depth + 1,
+                        max_depth=max_depth,
+                        max_items=max_items,
+                        max_str=max_str,
+                    )
+                )
+            return out
+        text = str(value)
+        if len(text) > max_str:
+            return text[:max_str] + "...<truncated>"
+        return text
+
+    def list_checkpoints(self):
+        """Return available checkpoint names from ComfyUI object info."""
+        with urlopen("{}/object_info/CheckpointLoaderSimple".format(self.base_url), timeout=8.0) as response:
+            data = json.loads(response.read().decode("utf-8"))
+
+        # Expected path:
+        # CheckpointLoaderSimple -> input -> required -> ckpt_name
+        # Supports multiple schema variants:
+        # 1) [ [..names..], {...meta...} ]
+        # 2) ["COMBO", {"options":[..names..], ...}]
+        node_info = data.get("CheckpointLoaderSimple", {})
+        required = node_info.get("input", {}).get("required", {})
+        ckpt_input = required.get("ckpt_name", None)
+        values = self._extract_combo_options(ckpt_input)
+        return [str(v) for v in values if str(v).strip()]
+
+    def list_upscale_models(self):
+        """Return available upscaler model names from ComfyUI object info."""
+        models = []
+        # Primary source: object_info schema for UpscaleModelLoader.
+        try:
+            with urlopen("{}/object_info/UpscaleModelLoader".format(self.base_url), timeout=8.0) as response:
+                data = json.loads(response.read().decode("utf-8"))
+
+            node_info = data.get("UpscaleModelLoader", {})
+            required = node_info.get("input", {}).get("required", {})
+            model_input = required.get("model_name", None)
+            values = self._extract_combo_options(model_input)
+            if values:
+                models = [str(v) for v in values if str(v).strip()]
+        except Exception as ex:
+            log.debug("ComfyClient list_upscale_models object_info parse failed: %s", ex)
+
+        # Fallback: direct model listing endpoint.
+        if not models:
+            try:
+                with urlopen("{}/models/upscale_models".format(self.base_url), timeout=8.0) as response:
+                    data = json.loads(response.read().decode("utf-8"))
+                if isinstance(data, list):
+                    models = [str(v) for v in data if str(v).strip()]
+            except Exception as ex:
+                log.debug("ComfyClient list_upscale_models /models fallback failed: %s", ex)
+
+        # Dedupe while preserving order.
+        seen = set()
+        ordered = []
+        for name in models:
+            if name not in seen:
+                seen.add(name)
+                ordered.append(name)
+        return ordered
+
+    def list_clip_models(self):
+        """Return available CLIP/text-encoder model names from ComfyUI object info."""
+        with urlopen("{}/object_info/CLIPLoader".format(self.base_url), timeout=8.0) as response:
+            data = json.loads(response.read().decode("utf-8"))
+
+        node_info = data.get("CLIPLoader", {})
+        required = node_info.get("input", {}).get("required", {})
+        clip_input = required.get("clip_name", None)
+        values = self._extract_combo_options(clip_input)
+        return [str(v) for v in values if str(v).strip()]
+
+    def list_clip_vision_models(self):
+        """Return available CLIP vision model names from ComfyUI object info."""
+        with urlopen("{}/object_info/CLIPVisionLoader".format(self.base_url), timeout=8.0) as response:
+            data = json.loads(response.read().decode("utf-8"))
+
+        node_info = data.get("CLIPVisionLoader", {})
+        required = node_info.get("input", {}).get("required", {})
+        clip_input = required.get("clip_name", None)
+        values = self._extract_combo_options(clip_input)
+        return [str(v) for v in values if str(v).strip()]
+
+    def list_rife_vfi_models(self):
+        """Return available RIFE checkpoint names from ComfyUI object info."""
+        node_type = "RIFE VFI"
+        with urlopen(
+            "{}/object_info/{}".format(self.base_url, quote(node_type, safe="")),
+            timeout=8.0,
+        ) as response:
+            data = json.loads(response.read().decode("utf-8"))
+
+        node_info = data.get(node_type, {})
+        required = node_info.get("input", {}).get("required", {})
+        ckpt_input = required.get("ckpt_name", None)
+        values = self._extract_combo_options(ckpt_input)
+        return [str(v) for v in values if str(v).strip()]
+
+    @staticmethod
+    def _extract_combo_options(input_config):
+        """Extract valid options from Comfy object_info input config variants."""
+        if input_config is None:
+            return []
+
+        # Variant: [ [options...], {meta...} ]
+        if isinstance(input_config, list) and input_config and isinstance(input_config[0], list):
+            return [str(v) for v in input_config[0]]
+
+        # Variant: ["COMBO", {"options":[...], ...}]
+        if (
+            isinstance(input_config, list)
+            and len(input_config) >= 2
+            and str(input_config[0]).upper() == "COMBO"
+            and isinstance(input_config[1], dict)
+        ):
+            options = input_config[1].get("options", [])
+            if isinstance(options, list):
+                return [str(v) for v in options]
+
+        # Variant: direct list of values
+        if isinstance(input_config, list):
+            scalar_values = []
+            for item in input_config:
+                if isinstance(item, (str, int, float)):
+                    scalar_values.append(str(item))
+            return scalar_values
+
+        return []
+
+    def history(self, prompt_id):
+        with urlopen("{}/history/{}".format(self.base_url, quote(str(prompt_id))), timeout=10.0) as response:
+            return json.loads(response.read().decode("utf-8"))
+
+    def history_all(self):
+        with urlopen("{}/history".format(self.base_url), timeout=10.0) as response:
+            return json.loads(response.read().decode("utf-8"))
+
+    def progress(self):
+        """Return ComfyUI /progress payload."""
+        try:
+            with urlopen("{}/progress".format(self.base_url), timeout=8.0) as response:
+                return json.loads(response.read().decode("utf-8"))
+        except HTTPError as ex:
+            if int(getattr(ex, "code", 0)) == 404:
+                # Some ComfyUI versions don't expose /progress.
+                return None
+            raise
+
+    def interrupt(self, prompt_id=None):
+        payload = {}
+        if prompt_id:
+            payload["prompt_id"] = str(prompt_id)
+        log.debug("ComfyClient interrupt request base_url=%s prompt_id=%s", self.base_url, payload.get("prompt_id", ""))
+        req = Request(
+            "{}/interrupt".format(self.base_url),
+            data=json.dumps(payload).encode("utf-8"),
+            method="POST",
+            headers={"Content-Type": "application/json"},
+        )
+        with urlopen(req, timeout=8.0) as response:
+            log.debug("ComfyClient interrupt response status=%s", int(response.status))
+            return int(response.status) >= 200 and int(response.status) < 300
+
+    def cancel_prompt(self, prompt_id):
+        """Request ComfyUI to delete/cancel a prompt from the queue."""
+        log.debug("ComfyClient cancel_prompt request base_url=%s prompt_id=%s", self.base_url, str(prompt_id))
+        payload = json.dumps({"delete": [str(prompt_id)]}).encode("utf-8")
+        req = Request(
+            "{}/queue".format(self.base_url),
+            data=payload,
+            method="POST",
+            headers={"Content-Type": "application/json"},
+        )
+        with urlopen(req, timeout=8.0) as response:
+            log.debug("ComfyClient cancel_prompt response status=%s", int(response.status))
+            return int(response.status) >= 200 and int(response.status) < 300
+
+    def queue(self):
+        """Return ComfyUI queue state."""
+        with urlopen("{}/queue".format(self.base_url), timeout=10.0) as response:
+            return json.loads(response.read().decode("utf-8"))
+
+    def upload_input_file(self, local_path):
+        """Upload a local file into ComfyUI input dir via /upload/image."""
+        local_path = str(local_path or "").strip()
+        if not local_path or not os.path.exists(local_path):
+            raise RuntimeError("Local file does not exist: {}".format(local_path))
+
+        boundary = "----OpenShotComfy{}".format(uuid.uuid4().hex)
+        filename = os.path.basename(local_path)
+        parts = []
+
+        def _add_field(name, value):
+            parts.append("--{}\r\n".format(boundary).encode("utf-8"))
+            parts.append('Content-Disposition: form-data; name="{}"\r\n\r\n'.format(name).encode("utf-8"))
+            parts.append(str(value).encode("utf-8"))
+            parts.append(b"\r\n")
+
+        _add_field("type", "input")
+        parts.append("--{}\r\n".format(boundary).encode("utf-8"))
+        parts.append(
+            (
+                'Content-Disposition: form-data; name="image"; filename="{}"\r\n'
+                "Content-Type: application/octet-stream\r\n\r\n"
+            ).format(filename).encode("utf-8")
+        )
+        with open(local_path, "rb") as handle:
+            parts.append(handle.read())
+        parts.append(b"\r\n")
+        parts.append("--{}--\r\n".format(boundary).encode("utf-8"))
+        body = b"".join(parts)
+
+        req = Request(
+            "{}/upload/image".format(self.base_url),
+            data=body,
+            method="POST",
+            headers={"Content-Type": "multipart/form-data; boundary={}".format(boundary)},
+        )
+        with urlopen(req, timeout=30.0) as response:
+            data = json.loads(response.read().decode("utf-8"))
+
+        name = str(data.get("name", "")).strip()
+        subfolder = str(data.get("subfolder", "")).strip()
+        if not name:
+            raise RuntimeError("ComfyUI upload failed: invalid response")
+        rel = "{}/{}".format(subfolder, name) if subfolder else name
+        return "{} [input]".format(rel)
+
+    @staticmethod
+    def prompt_in_queue(prompt_id, queue_data):
+        """Check if prompt_id appears in queue_running/queue_pending payload."""
+        pid = str(prompt_id)
+        if not isinstance(queue_data, dict):
+            return False
+
+        for key in ("queue_running", "queue_pending"):
+            entries = queue_data.get(key, [])
+            if not isinstance(entries, list):
+                continue
+            for entry in entries:
+                # Common format: [number, prompt_id, ...]
+                if isinstance(entry, list) and len(entry) >= 2 and str(entry[1]) == pid:
+                    return True
+                # Defensive fallback for dict-like entries
+                if isinstance(entry, dict):
+                    if str(entry.get("prompt_id", "")) == pid:
+                        return True
+        return False
+
+    @staticmethod
+    def extract_file_outputs(history_entry, save_node_ids=None):
+        """Return a flat list of file refs from image/video/audio history outputs."""
+        outputs = []
+        if not isinstance(history_entry, dict):
+            return outputs
+        node_outputs = history_entry.get("outputs", {})
+        if not isinstance(node_outputs, dict):
+            return outputs
+        save_node_ids = set(str(node_id) for node_id in (save_node_ids or []))
+
+        for node_id, node_out in node_outputs.items():
+            if save_node_ids and str(node_id) not in save_node_ids:
+                continue
+            if isinstance(node_out, dict):
+                for key in ("images", "videos", "video", "gifs", "audios", "audio", "files", "filenames"):
+                    refs = node_out.get(key, [])
+                    if not isinstance(refs, list):
+                        continue
+                    for ref in refs:
+                        if not isinstance(ref, dict):
+                            continue
+                        if ref.get("filename"):
+                            outputs.append({
+                                "filename": str(ref.get("filename")),
+                                "subfolder": str(ref.get("subfolder", "")),
+                                "type": str(ref.get("type", "output")),
+                            })
+                # Also extract text-like outputs (for custom nodes such as Whisper/SRT pipelines).
+                for value in node_out.values():
+                    text_values = ComfyClient._extract_text_outputs(value)
+                    for text_value in text_values:
+                        output_format = "srt" if ComfyClient._looks_like_srt(text_value) else "txt"
+                        outputs.append({
+                            "text": text_value,
+                            "format": output_format,
+                            "type": "text",
+                        })
+            else:
+                # Some custom nodes emit list/string outputs directly instead of dicts.
+                text_values = ComfyClient._extract_text_outputs(node_out)
+                for text_value in text_values:
+                    output_format = "srt" if ComfyClient._looks_like_srt(text_value) else "txt"
+                    outputs.append({
+                        "text": text_value,
+                        "format": output_format,
+                        "type": "text",
+                    })
+        return outputs
+
+    @staticmethod
+    def extract_image_outputs(history_entry, save_node_ids=None):
+        return ComfyClient.extract_file_outputs(history_entry, save_node_ids=save_node_ids)
+
+    @staticmethod
+    def _extract_text_output(value):
+        """Extract text payloads from common Comfy output structures."""
+        values = ComfyClient._extract_text_outputs(value)
+        return values[0] if values else ""
+
+    @staticmethod
+    def _extract_text_outputs(value):
+        """Extract one or more text payloads from common Comfy output structures."""
+        if isinstance(value, str):
+            text = value.strip()
+            return [text] if text else []
+        if isinstance(value, list):
+            out = []
+            for item in value:
+                if isinstance(item, str):
+                    text = item.strip()
+                    if text:
+                        out.append(text)
+            return out
+        if isinstance(value, dict):
+            out = []
+            for key in ("srt", "text", "value"):
+                text = value.get(key)
+                if isinstance(text, str) and text.strip():
+                    out.append(text.strip())
+            return out
+        return []
+
+    @staticmethod
+    def _looks_like_srt(text):
+        text = str(text or "")
+        if "-->" not in text:
+            return False
+        return bool(re.search(r"\d{2}:\d{2}:\d{2}[,.:]\d{3}\s+-->\s+\d{2}:\d{2}:\d{2}[,.:]\d{3}", text))
+
+    def download_output_file(self, file_ref, destination_path):
+        """Download a Comfy output reference to a local file path."""
+        params = {
+            "filename": file_ref.get("filename", ""),
+            "subfolder": file_ref.get("subfolder", ""),
+            "type": file_ref.get("type", "output"),
+        }
+        url = "{}/view?{}".format(self.base_url, urlencode(params))
+        with urlopen(url, timeout=10.0) as response:
+            data = response.read()
+
+        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
+        with open(destination_path, "wb") as handle:
+            handle.write(data)
+
+    def download_image(self, image_ref, destination_path):
+        self.download_output_file(image_ref, destination_path)
diff --git a/src/classes/comfy_pipelines.py b/src/classes/comfy_pipelines.py
new file mode 100644
index 000000000..9c2fd816d
--- /dev/null
+++ b/src/classes/comfy_pipelines.py
@@ -0,0 +1,468 @@
+"""
+ @file
+ @brief This file contains built-in ComfyUI pipeline definitions.
+ @author Jonathan Thomas <jonathan@openshot.org>
+
+ @section LICENSE
+
+ Copyright (c) 2008-2026 OpenShot Studios, LLC
+ (http://www.openshotstudios.com). This file is part of
+ OpenShot Video Editor (http://www.openshot.org), an open-source project
+ dedicated to delivering high quality video editing and animation solutions
+ to the world.
+
+ OpenShot Video Editor is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ OpenShot Video Editor is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with OpenShot Library.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import random
+import os
+
+
+RASTER_IMAGE_EXTENSIONS = {
+    ".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tif", ".tiff", ".gif",
+}
+
+DEFAULT_SD_CHECKPOINT = "sd_xl_turbo_1.0_fp16.safetensors"
+DEFAULT_SD_BASE_CHECKPOINT = "sd_xl_base_1.0.safetensors"
+DEFAULT_UPSCALE_MODEL = "RealESRGAN_x4plus.safetensors"
+DEFAULT_STABLE_AUDIO_CHECKPOINT = "stable-audio-open-1.0.safetensors"
+DEFAULT_STABLE_AUDIO_CLIP = "t5-base.safetensors"
+DEFAULT_SVD_CHECKPOINT = "svd_xt.safetensors"
+DEFAULT_RIFE_VFI_MODEL = "rife47.pth"
+
+
+def is_supported_img2img_path(path):
+    path_text = str(path or "").strip()
+    # Comfy annotated paths can look like: "image.jpg [input]"
+    if path_text.endswith("]") and " [" in path_text:
+        path_text = path_text.rsplit(" [", 1)[0].strip()
+    ext = os.path.splitext(path_text)[1].lower()
+    return ext in RASTER_IMAGE_EXTENSIONS
+
+
+def _supports_img2img(source_file=None):
+    if not source_file:
+        return False
+    if source_file.data.get("media_type") != "image":
+        return False
+    path = source_file.data.get("path", "")
+    return is_supported_img2img_path(path)
+
+
+def _supports_video_upscale(source_file=None):
+    if not source_file:
+        return False
+    return source_file.data.get("media_type") == "video"
+
+
+def available_pipelines(source_file=None):
+    pipelines = [
+        {"id": "txt2img-basic", "name": "Basic Text to Image"},
+        {"id": "txt2video-svd", "name": "Text to Video (txt_to_image_to_video)"},
+        {"id": "txt2audio-stable-open", "name": "Text to Audio (Stable Audio Open)"},
+    ]
+    if _supports_img2img(source_file):
+        pipelines.insert(0, {"id": "img2img-basic", "name": "Basic Image Variation"})
+        pipelines.insert(1, {"id": "upscale-realesrgan-x4", "name": "Upscale Image (RealESRGAN x4)"})
+        pipelines.insert(2, {"id": "img2video-svd", "name": "Image to Video (WAN 2.2 TI2V)"})
+    if _supports_video_upscale(source_file):
+        pipelines.append({"id": "video-segment-scenes-transnet", "name": "Segment Scenes (TransNetV2)"})
+        pipelines.append({"id": "video-frame-interpolation-rife2x", "name": "Frame Interpolation (RIFE 2x FPS)"})
+        pipelines.append({"id": "video-upscale-gan", "name": "Upscale Video (GAN x4, first 10s)"})
+        pipelines.append({"id": "video2video-basic", "name": "Video + Text to Video (Style Transfer)"})
+        pipelines.append({"id": "video-whisper-srt", "name": "Whisper Transcribe to SRT (Caption Effect)"})
+    return pipelines
+
+
+def pipeline_requires_checkpoint(pipeline_id):
+    return str(pipeline_id or "") in (
+        "txt2img-basic",
+        "img2img-basic",
+        "txt2audio-stable-open",
+        "txt2video-svd",
+        "video2video-basic",
+    )
+
+
+def pipeline_requires_upscale_model(pipeline_id):
+    return str(pipeline_id or "") in ("upscale-realesrgan-x4", "video-upscale-gan")
+
+
+def pipeline_requires_stable_audio_clip(pipeline_id):
+    return str(pipeline_id or "") in ("txt2audio-stable-open",)
+
+
+def pipeline_requires_svd_checkpoint(pipeline_id):
+    return str(pipeline_id or "") in ("txt2video-svd", "img2video-svd")
+
+
+def pipeline_requires_rife_model(pipeline_id):
+    return str(pipeline_id or "") in ("video-frame-interpolation-rife2x",)
+
+
+def build_workflow(
+    pipeline_id,
+    prompt_text,
+    source_path,
+    output_prefix,
+    checkpoint_name=None,
+    upscale_model_name=None,
+    stable_audio_clip_name=None,
+    svd_checkpoint_name=None,
+    source_fps=None,
+    rife_model_name=None,
+):
+    prompt_text = str(prompt_text or "cinematic shot, highly detailed").strip()
+    if not prompt_text:
+        prompt_text = "cinematic shot, highly detailed"
+    output_prefix = str(output_prefix or "openshot_gen").strip() or "openshot_gen"
+    checkpoint_name = str(checkpoint_name or "").strip() or DEFAULT_SD_CHECKPOINT
+    upscale_model_name = str(upscale_model_name or "").strip() or DEFAULT_UPSCALE_MODEL
+    stable_audio_clip_name = str(stable_audio_clip_name or "").strip() or DEFAULT_STABLE_AUDIO_CLIP
+    svd_checkpoint_name = str(svd_checkpoint_name or "").strip() or DEFAULT_SVD_CHECKPOINT
+    rife_model_name = str(rife_model_name or "").strip() or DEFAULT_RIFE_VFI_MODEL
+    try:
+        source_fps_value = float(source_fps)
+    except (TypeError, ValueError):
+        source_fps_value = 30.0
+    if source_fps_value <= 0:
+        source_fps_value = 30.0
+    target_fps = round(source_fps_value * 2.0, 6)
+    seed = random.randint(1, 2**31 - 1)
+
+    if pipeline_id == "img2img-basic":
+        if not is_supported_img2img_path(source_path):
+            raise ValueError(
+                "The selected file is not a supported raster image for this pipeline. "
+                "Use PNG/JPG/WebP/BMP/TIFF or switch to Text to Image."
+            )
+        return {
+            "1": {"inputs": {"ckpt_name": checkpoint_name}, "class_type": "CheckpointLoaderSimple"},
+            "2": {"inputs": {"text": prompt_text, "clip": ["1", 1]}, "class_type": "CLIPTextEncode"},
+            "3": {"inputs": {"text": "low quality, blurry", "clip": ["1", 1]}, "class_type": "CLIPTextEncode"},
+            "4": {"inputs": {"image": str(source_path or ""), "upload": "image"}, "class_type": "LoadImage"},
+            "5": {"inputs": {"pixels": ["4", 0], "vae": ["1", 2]}, "class_type": "VAEEncode"},
+            "6": {
+                "inputs": {
+                    "seed": seed, "steps": 20, "cfg": 7.0, "sampler_name": "euler", "scheduler": "normal",
+                    "denoise": 0.65, "model": ["1", 0], "positive": ["2", 0], "negative": ["3", 0], "latent_image": ["5", 0],
+                },
+                "class_type": "KSampler",
+            },
+            "7": {"inputs": {"samples": ["6", 0], "vae": ["1", 2]}, "class_type": "VAEDecode"},
+            "8": {"inputs": {"filename_prefix": output_prefix, "images": ["7", 0]}, "class_type": "SaveImage"},
+        }
+
+    if pipeline_id == "upscale-realesrgan-x4":
+        if not is_supported_img2img_path(source_path):
+            raise ValueError(
+                "The selected file is not a supported raster image for this pipeline. "
+                "Use PNG/JPG/WebP/BMP/TIFF or switch to Text to Image."
+            )
+        return {
+            "1": {"inputs": {"image": str(source_path or ""), "upload": "image"}, "class_type": "LoadImage"},
+            "2": {"inputs": {"model_name": upscale_model_name}, "class_type": "UpscaleModelLoader"},
+            "3": {"inputs": {"upscale_model": ["2", 0], "image": ["1", 0]}, "class_type": "ImageUpscaleWithModel"},
+            "4": {"inputs": {"filename_prefix": output_prefix, "images": ["3", 0]}, "class_type": "SaveImage"},
+        }
+
+    if pipeline_id == "video-upscale-gan":
+        source_path = str(source_path or "").strip()
+        if not source_path:
+            raise ValueError("A source video is required for this pipeline.")
+        return {
+            "1": {"inputs": {"file": source_path}, "class_type": "LoadVideo"},
+            "2": {
+                "inputs": {"video": ["1", 0], "start_time": 0.0, "duration": 10.0, "strict_duration": False},
+                "class_type": "Video Slice",
+            },
+            "3": {"inputs": {"video": ["2", 0]}, "class_type": "GetVideoComponents"},
+            "4": {"inputs": {"model_name": upscale_model_name}, "class_type": "UpscaleModelLoader"},
+            "5": {"inputs": {"upscale_model": ["4", 0], "image": ["3", 0]}, "class_type": "ImageUpscaleWithModel"},
+            "6": {"inputs": {"images": ["5", 0], "audio": ["3", 1], "fps": ["3", 2]}, "class_type": "CreateVideo"},
+            "7": {"inputs": {"video": ["6", 0], "filename_prefix": "video/{}".format(output_prefix), "format": "auto", "codec": "auto"}, "class_type": "SaveVideo"},
+        }
+
+    if pipeline_id == "video-whisper-srt":
+        source_path = str(source_path or "").strip()
+        if not source_path:
+            raise ValueError("A source video is required for this pipeline.")
+        return {
+            "1": {
+                "inputs": {
+                    "video": source_path,
+                    "force_rate": 0,
+                    "custom_width": 0,
+                    "custom_height": 0,
+                    "frame_load_cap": 0,
+                    "skip_first_frames": 0,
+                    "select_every_nth": 1,
+                    "format": "AnimateDiff",
+                },
+                "class_type": "VHS_LoadVideo",
+            },
+            "2": {
+                "inputs": {
+                    "model": "medium",
+                    "language": "auto",
+                    "prompt": "",
+                    "audio": ["1", 2],
+                },
+                "class_type": "Apply Whisper",
+            },
+            "3": {
+                "inputs": {
+                    "name": "{}_segments".format(output_prefix),
+                    "alignment": ["2", 1],
+                },
+                "class_type": "Save SRT",
+            },
+            "4": {
+                "inputs": {
+                    "preview": "",
+                    "previewMode": None,
+                    "source": ["3", 0],
+                },
+                "class_type": "PreviewAny",
+            },
+        }
+
+    if pipeline_id == "video-frame-interpolation-rife2x":
+        source_path = str(source_path or "").strip()
+        if not source_path:
+            raise ValueError("A source video is required for this pipeline.")
+        return {
+            "1": {"inputs": {"file": source_path}, "class_type": "LoadVideo"},
+            "2": {"inputs": {"video": ["1", 0]}, "class_type": "GetVideoComponents"},
+            "3": {
+                "inputs": {
+                    "frames": ["2", 0],
+                    "ckpt_name": rife_model_name,
+                    "clear_cache_after_n_frames": 10,
+                    "multiplier": 2,
+                    "fast_mode": True,
+                    "ensemble": True,
+                    "scale_factor": 1,
+                },
+                "class_type": "RIFE VFI",
+                "_meta": {"title": "RIFE VFI (recommend rife47 and rife49)"},
+            },
+            "4": {"inputs": {"images": ["3", 0], "audio": ["2", 1], "fps": target_fps}, "class_type": "CreateVideo"},
+            "5": {
+                "inputs": {
+                    "video": ["4", 0],
+                    "filename_prefix": "video/{}".format(output_prefix),
+                    "format": "auto",
+                    "codec": "auto",
+                },
+                "class_type": "SaveVideo",
+            },
+        }
+
+    if pipeline_id == "video-segment-scenes-transnet":
+        source_path = str(source_path or "").strip()
+        if not source_path:
+            raise ValueError("A source video is required for this pipeline.")
+        return {
+            "1": {
+                "inputs": {
+                    "source_video_path": source_path,
+                    "threshold": 0.5,
+                    "min_scene_length_frames": 30,
+                    "device": "auto",
+                },
+                "class_type": "OpenShotTransNetSceneDetect",
+                "_meta": {"title": "OpenShot TransNet Scene Detect"},
+            },
+            "9": {
+                "inputs": {
+                    "preview": "",
+                    "previewMode": None,
+                    "source": ["1", 0],
+                },
+                "class_type": "PreviewAny",
+                "_meta": {"title": "Preview Any"},
+            },
+        }
+
+    if pipeline_id == "txt2audio-stable-open":
+        return {
+            "3": {
+                "inputs": {
+                    "seed": seed,
+                    "steps": 50,
+                    "cfg": 5.0,
+                    "sampler_name": "dpmpp_3m_sde_gpu",
+                    "scheduler": "exponential",
+                    "denoise": 1.0,
+                    "model": ["4", 0],
+                    "positive": ["6", 0],
+                    "negative": ["7", 0],
+                    "latent_image": ["11", 0],
+                },
+                "class_type": "KSampler",
+            },
+            "4": {"inputs": {"ckpt_name": checkpoint_name}, "class_type": "CheckpointLoaderSimple"},
+            "6": {"inputs": {"text": prompt_text, "clip": ["10", 0]}, "class_type": "CLIPTextEncode"},
+            "7": {"inputs": {"text": "", "clip": ["10", 0]}, "class_type": "CLIPTextEncode"},
+            "10": {"inputs": {"clip_name": stable_audio_clip_name, "type": "stable_audio"}, "class_type": "CLIPLoader"},
+            "11": {"inputs": {"seconds": 30.0, "batch_size": 1}, "class_type": "EmptyLatentAudio"},
+            "12": {"inputs": {"samples": ["3", 0], "vae": ["4", 2]}, "class_type": "VAEDecodeAudio"},
+            "13": {"inputs": {"filename_prefix": "audio/{}".format(output_prefix), "audio": ["12", 0]}, "class_type": "SaveAudio"},
+        }
+
+    if pipeline_id == "txt2video-svd":
+        return {
+            "1": {"inputs": {"ckpt_name": svd_checkpoint_name}, "class_type": "ImageOnlyCheckpointLoader"},
+            "2": {"inputs": {"ckpt_name": checkpoint_name}, "class_type": "CheckpointLoaderSimple"},
+            "3": {"inputs": {"text": prompt_text, "clip": ["2", 1]}, "class_type": "CLIPTextEncode"},
+            "4": {"inputs": {"text": "low quality, blurry", "clip": ["2", 1]}, "class_type": "CLIPTextEncode"},
+            "5": {"inputs": {"width": 512, "height": 288, "batch_size": 1}, "class_type": "EmptyLatentImage"},
+            "6": {
+                "inputs": {
+                    "seed": seed,
+                    "steps": 8,
+                    "cfg": 6.0,
+                    "sampler_name": "euler",
+                    "scheduler": "normal",
+                    "denoise": 1.0,
+                    "model": ["2", 0],
+                    "positive": ["3", 0],
+                    "negative": ["4", 0],
+                    "latent_image": ["5", 0],
+                },
+                "class_type": "KSampler",
+            },
+            "7": {"inputs": {"samples": ["6", 0], "vae": ["2", 2]}, "class_type": "VAEDecode"},
+            "8": {
+                "inputs": {
+                    "clip_vision": ["1", 1],
+                    "init_image": ["7", 0],
+                    "vae": ["1", 2],
+                    "width": 512,
+                    "height": 288,
+                    "video_frames": 24,
+                    "motion_bucket_id": 127,
+                    "fps": 12,
+                    "augmentation_level": 0.0,
+                },
+                "class_type": "SVD_img2vid_Conditioning",
+            },
+            "9": {"inputs": {"model": ["1", 0], "min_cfg": 1.0}, "class_type": "VideoLinearCFGGuidance"},
+            "10": {
+                "inputs": {
+                    "seed": seed + 1,
+                    "steps": 10,
+                    "cfg": 2.5,
+                    "sampler_name": "euler",
+                    "scheduler": "karras",
+                    "denoise": 1.0,
+                    "model": ["9", 0],
+                    "positive": ["8", 0],
+                    "negative": ["8", 1],
+                    "latent_image": ["8", 2],
+                },
+                "class_type": "KSampler",
+            },
+            "11": {"inputs": {"samples": ["10", 0], "vae": ["1", 2]}, "class_type": "VAEDecode"},
+            "12": {"inputs": {"images": ["11", 0], "fps": 12}, "class_type": "CreateVideo"},
+            "13": {"inputs": {"video": ["12", 0], "filename_prefix": "video/{}".format(output_prefix), "format": "auto", "codec": "auto"}, "class_type": "SaveVideo"},
+        }
+
+    if pipeline_id == "img2video-svd":
+        if not is_supported_img2img_path(source_path):
+            raise ValueError(
+                "The selected file is not a supported raster image for this pipeline. "
+                "Use PNG/JPG/WebP/BMP/TIFF or switch to Text to Video."
+            )
+        return {
+            "1": {"inputs": {"ckpt_name": svd_checkpoint_name}, "class_type": "ImageOnlyCheckpointLoader"},
+            "2": {"inputs": {"image": str(source_path or ""), "upload": "image"}, "class_type": "LoadImage"},
+            "3": {
+                "inputs": {
+                    "clip_vision": ["1", 1],
+                    "init_image": ["2", 0],
+                    "vae": ["1", 2],
+                    "width": 1024,
+                    "height": 576,
+                    "video_frames": 25,
+                    "motion_bucket_id": 127,
+                    "fps": 6,
+                    "augmentation_level": 0.0,
+                },
+                "class_type": "SVD_img2vid_Conditioning",
+            },
+            "4": {"inputs": {"model": ["1", 0], "min_cfg": 1.0}, "class_type": "VideoLinearCFGGuidance"},
+            "5": {
+                "inputs": {
+                    "seed": seed + 1,
+                    "steps": 20,
+                    "cfg": 2.5,
+                    "sampler_name": "euler",
+                    "scheduler": "karras",
+                    "denoise": 1.0,
+                    "model": ["4", 0],
+                    "positive": ["3", 0],
+                    "negative": ["3", 1],
+                    "latent_image": ["3", 2],
+                },
+                "class_type": "KSampler",
+            },
+            "6": {"inputs": {"samples": ["5", 0], "vae": ["1", 2]}, "class_type": "VAEDecode"},
+            "7": {"inputs": {"images": ["6", 0], "fps": 6}, "class_type": "CreateVideo"},
+            "8": {"inputs": {"video": ["7", 0], "filename_prefix": "video/{}".format(output_prefix), "format": "auto", "codec": "auto"}, "class_type": "SaveVideo"},
+        }
+
+    if pipeline_id == "video2video-basic":
+        source_path = str(source_path or "").strip()
+        if not source_path:
+            raise ValueError("A source video is required for this pipeline.")
+        return {
+            "1": {"inputs": {"file": source_path}, "class_type": "LoadVideo"},
+            "2": {
+                "inputs": {"video": ["1", 0], "start_time": 0.0, "duration": 10.0, "strict_duration": False},
+                "class_type": "Video Slice",
+            },
+            "3": {"inputs": {"video": ["2", 0]}, "class_type": "GetVideoComponents"},
+            "4": {"inputs": {"ckpt_name": checkpoint_name}, "class_type": "CheckpointLoaderSimple"},
+            "5": {"inputs": {"text": prompt_text, "clip": ["4", 1]}, "class_type": "CLIPTextEncode"},
+            "6": {"inputs": {"text": "low quality, blurry", "clip": ["4", 1]}, "class_type": "CLIPTextEncode"},
+            "7": {"inputs": {"pixels": ["3", 0], "vae": ["4", 2]}, "class_type": "VAEEncode"},
+            "8": {
+                "inputs": {
+                    "seed": seed, "steps": 16, "cfg": 6.0, "sampler_name": "euler", "scheduler": "normal",
+                    "denoise": 0.55, "model": ["4", 0], "positive": ["5", 0], "negative": ["6", 0], "latent_image": ["7", 0],
+                },
+                "class_type": "KSampler",
+            },
+            "9": {"inputs": {"samples": ["8", 0], "vae": ["4", 2]}, "class_type": "VAEDecode"},
+            "10": {"inputs": {"images": ["9", 0], "audio": ["3", 1], "fps": ["3", 2]}, "class_type": "CreateVideo"},
+            "11": {"inputs": {"video": ["10", 0], "filename_prefix": "video/{}".format(output_prefix), "format": "auto", "codec": "auto"}, "class_type": "SaveVideo"},
+        }
+
+    return {
+        "1": {"inputs": {"ckpt_name": checkpoint_name}, "class_type": "CheckpointLoaderSimple"},
+        "2": {"inputs": {"text": prompt_text, "clip": ["1", 1]}, "class_type": "CLIPTextEncode"},
+        "3": {"inputs": {"text": "low quality, blurry", "clip": ["1", 1]}, "class_type": "CLIPTextEncode"},
+        "4": {"inputs": {"width": 1024, "height": 576, "batch_size": 1}, "class_type": "EmptyLatentImage"},
+        "5": {
+            "inputs": {
+                "seed": seed, "steps": 20, "cfg": 7.0, "sampler_name": "euler", "scheduler": "normal",
+                "denoise": 1.0, "model": ["1", 0], "positive": ["2", 0], "negative": ["3", 0], "latent_image": ["4", 0],
+            },
+            "class_type": "KSampler",
+        },
+        "6": {"inputs": {"samples": ["5", 0], "vae": ["1", 2]}, "class_type": "VAEDecode"},
+        "7": {"inputs": {"filename_prefix": output_prefix, "images": ["6", 0]}, "class_type": "SaveImage"},
+    }
diff --git a/src/classes/comfy_templates.py b/src/classes/comfy_templates.py
new file mode 100644
index 000000000..adcc0d345
--- /dev/null
+++ b/src/classes/comfy_templates.py
@@ -0,0 +1,405 @@
+"""
+ @file
+ @brief ComfyUI workflow template discovery and classification helpers.
+"""
+
+import copy
+import json
+import os
+
+from classes import info
+from classes.logger import log
+
+
+IMAGE_INPUT_TYPES = {
+    "loadimage",
+    "load image",
+}
+VIDEO_INPUT_TYPES = {
+    "loadvideo",
+    "load video",
+    "vhs_loadvideo",
+}
+AUDIO_INPUT_TYPES = {
+    "loadaudio",
+    "load audio",
+}
+
+IMAGE_OUTPUT_TYPES = {
+    "saveimage",
+    "save image",
+}
+VIDEO_OUTPUT_TYPES = {
+    "savevideo",
+    "save video",
+}
+AUDIO_OUTPUT_TYPES = {
+    "saveaudio",
+    "save audio",
+}
+
+KNOWN_NODE_TYPES = {
+    # Input
+    "checkpointloadersimple",
+    "unetloader",
+    "cliptextencode",
+    "cliploader",
+    "vaeloader",
+    "loadimage",
+    "loadvideo",
+    "vhs_loadvideo",
+    "loadaudio",
+    # Core built-in/OpenShot workflows
+    "vaeencode",
+    "vaedecode",
+    "ksampler",
+    "upscalemodelloader",
+    "imageupscalewithmodel",
+    "videoslice",
+    "video slice",
+    "getvideocomponents",
+    "createvideo",
+    "saveimage",
+    "savevideo",
+    "saveaudio",
+    "save srt",
+    "emptylatentimage",
+    "emptyhunyuanlatentvideo",
+    "wan22imagetovideolatent",
+    "imageonlycheckpointloader",
+    "modelsamplingsd3",
+    "svd_img2vid_conditioning",
+    "videolinearcfgguidance",
+    "emptylatentaudio",
+    "vaedecodeaudio",
+    "previewany",
+    "apply whisper",
+    "riff vfi",
+    "rife vfi",
+    "downloadandloadtransnetmodel",
+    "transnetv2_run",
+    "selectvideo",
+    "stableaudioprojectionmodel",
+    "stableaudiomodelloader",
+    "stableaudioemptylatentaudio",
+    "stableaudioembedding",
+    "kdiffusionsampler",
+    "stableaudiovaedecode",
+    "videocombine",
+    "imagescaleby",
+    "imagetosimage",
+    "imagetoimage",
+    "imagescaleto",
+    "imageblur",
+    "imagecompositemasked",
+    # Video Helper Suite
+    "vhs_batchmanager",
+    "vhs_loadvideo",
+    "vhs_loadvideopath",
+    "vhs_loadvideoffmpegpath",
+    "vhs_videocombine",
+    "vhs_videoinfo",
+    "vhs_videoinfoloaded",
+    "vhs_videoinfosource",
+    # ComfyUI-segment-anything-2
+    "downloadandloadsam2model",
+    "sam2segmentation",
+    "sam2autosegmentation",
+    "sam2videosegmentationaddpoints",
+    "sam2videosegmentation",
+    # OpenShot-ComfyUI (custom SAM2)
+    "openshotdownloadandloadsam2model",
+    "openshotsam2segmentation",
+    "openshotsam2videosegmentationaddpoints",
+    "openshotsam2videosegmentationchunked",
+    "openshotimageblurmasked",
+    "openshotimagehighlightmasked",
+}
+
+
+class ComfyTemplateRegistry:
+    """Discovers ComfyUI templates from built-in + user folders."""
+
+    def __init__(self):
+        self._cache = None
+        self._cache_signature = None
+
+    @staticmethod
+    def _is_ignored_filename(name):
+        return str(name or "").strip().lower() in ("debug.json", "debug_error.json", "debug_sent.json")
+
+    def _template_roots(self):
+        return [
+            (os.path.join(info.PATH, "comfyui"), False),
+            (info.COMFYUI_PATH, True),
+        ]
+
+    def _current_signature(self):
+        signature = []
+        for folder, _is_user in self._template_roots():
+            if not os.path.isdir(folder):
+                continue
+            for name in sorted(os.listdir(folder)):
+                if not name.lower().endswith(".json"):
+                    continue
+                if self._is_ignored_filename(name):
+                    continue
+                path = os.path.join(folder, name)
+                try:
+                    stat = os.stat(path)
+                    signature.append((path, stat.st_mtime_ns, stat.st_size))
+                except OSError:
+                    continue
+        return tuple(signature)
+
+    def discover(self, force=False):
+        signature = self._current_signature()
+        if not force and self._cache is not None and signature == self._cache_signature:
+            return self._cache
+
+        templates = []
+        existing_ids = set()
+        for folder, is_user in self._template_roots():
+            if not os.path.isdir(folder):
+                continue
+            for name in sorted(os.listdir(folder)):
+                if not name.lower().endswith(".json"):
+                    continue
+                if self._is_ignored_filename(name):
+                    continue
+                path = os.path.join(folder, name)
+                template = self._load_template(path, is_user=is_user, existing_ids=existing_ids)
+                if template is None:
+                    continue
+                templates.append(template)
+
+        templates.sort(key=lambda t: (int(t.get("sort_order", 99999)), str(t.get("display_name", "")).lower()))
+        self._cache = templates
+        self._cache_signature = signature
+        return templates
+
+    def _load_template(self, path, is_user, existing_ids):
+        try:
+            with open(path, "r", encoding="utf-8") as handle:
+                payload = json.load(handle)
+        except Exception as ex:
+            log.warning("Skipping invalid ComfyUI template JSON %s: %s", path, ex)
+            return None
+        if not isinstance(payload, dict):
+            log.warning("Skipping invalid ComfyUI template JSON %s: root must be an object", path)
+            return None
+
+        workflow = self._extract_workflow(payload)
+        if workflow is None:
+            log.warning("Skipping invalid ComfyUI template JSON %s: no valid workflow graph found", path)
+            return None
+
+        node_types = []
+        input_types = set()
+        output_types = set()
+        unknown_node_types = set()
+        needs_prompt = False
+        for node in workflow.values():
+            if not isinstance(node, dict):
+                continue
+            class_type = str(node.get("class_type", "")).strip()
+            if not class_type:
+                continue
+            inputs = node.get("inputs", {})
+            if not isinstance(inputs, dict):
+                inputs = {}
+            class_key = class_type.lower().replace("_", "")
+            class_flat = class_type.lower().strip()
+            node_types.append(class_type)
+
+            text_value = inputs.get("text", None)
+            if isinstance(text_value, str):
+                meta = node.get("_meta", {})
+                meta_title = ""
+                if isinstance(meta, dict):
+                    meta_title = str(meta.get("title", "")).strip().lower()
+                if "textencode" in class_key or "prompt" in meta_title:
+                    needs_prompt = True
+
+            if class_flat in IMAGE_INPUT_TYPES or class_key in IMAGE_INPUT_TYPES:
+                input_types.add("image")
+            if class_flat in VIDEO_INPUT_TYPES or class_key in VIDEO_INPUT_TYPES:
+                input_types.add("video")
+            if class_flat in AUDIO_INPUT_TYPES or class_key in AUDIO_INPUT_TYPES:
+                input_types.add("audio")
+
+            if class_flat in IMAGE_OUTPUT_TYPES or class_key in IMAGE_OUTPUT_TYPES:
+                output_types.add("image")
+            if class_flat in VIDEO_OUTPUT_TYPES or class_key in VIDEO_OUTPUT_TYPES:
+                output_types.add("video")
+            if class_flat in AUDIO_OUTPUT_TYPES or class_key in AUDIO_OUTPUT_TYPES:
+                output_types.add("audio")
+
+            if class_flat not in KNOWN_NODE_TYPES and class_key not in KNOWN_NODE_TYPES:
+                unknown_node_types.add(class_type)
+
+        if unknown_node_types:
+            log.warning(
+                "ComfyUI template has unknown node types (%s): %s",
+                os.path.basename(path),
+                ", ".join(sorted(unknown_node_types)),
+            )
+
+        override_category = str(payload.get("menu_category") or payload.get("category") or "").strip().lower()
+        override_menu_parent = str(payload.get("menu_parent") or "").strip()
+        override_output_type = str(payload.get("output_type") or payload.get("media_output") or "").strip().lower()
+        override_icon = str(payload.get("action_icon") or payload.get("icon") or "").strip()
+        override_open_dialog = payload.get("open_dialog", None)
+
+        inferred_category = "unknown"
+        requires_source = bool(input_types)
+        if output_types:
+            inferred_category = "enhance" if requires_source else "create"
+        else:
+            if override_category not in ("create", "enhance", "unknown"):
+                log.warning(
+                    "ComfyUI template category unknown (%s): no output nodes detected",
+                    os.path.basename(path),
+                )
+        if override_category in ("create", "enhance", "unknown"):
+            inferred_category = override_category
+
+        template_id = str(payload.get("template_id") or payload.get("id") or "").strip()
+        if not template_id:
+            template_id = os.path.splitext(os.path.basename(path))[0]
+
+        unique_id = template_id
+        suffix = 2
+        while unique_id in existing_ids:
+            unique_id = "{}__{}".format(template_id, suffix)
+            suffix += 1
+        existing_ids.add(unique_id)
+
+        display_name = self._extract_name(payload, path)
+        if is_user:
+            display_name = "(User) {}".format(display_name)
+
+        try:
+            sort_order = int(payload.get("menu_order", 99999))
+        except (TypeError, ValueError):
+            sort_order = 99999
+        primary_output = self._primary_output_type(output_types)
+        if override_output_type in ("image", "video", "audio", "unknown"):
+            primary_output = override_output_type
+
+        open_dialog = None
+        if isinstance(override_open_dialog, bool):
+            open_dialog = override_open_dialog
+
+        return {
+            "id": unique_id,
+            "template_id": template_id,
+            "display_name": display_name,
+            "path": path,
+            "is_user": is_user,
+            "category": inferred_category,
+            "input_types": sorted(input_types),
+            "output_types": sorted(output_types),
+            "primary_output": primary_output,
+            "sort_order": sort_order,
+            "workflow": workflow,
+            "node_types": node_types,
+            "needs_prompt": needs_prompt,
+            "action_icon": override_icon,
+            "open_dialog": open_dialog,
+            "menu_parent": override_menu_parent,
+        }
+
+    def _primary_output_type(self, output_types):
+        if "video" in output_types:
+            return "video"
+        if "image" in output_types:
+            return "image"
+        if "audio" in output_types:
+            return "audio"
+        return "unknown"
+
+    def _extract_name(self, payload, path):
+        fields = [
+            payload.get("name"),
+            payload.get("title"),
+            payload.get("workflow_name"),
+        ]
+        metadata = payload.get("metadata")
+        if isinstance(metadata, dict):
+            fields.extend([metadata.get("name"), metadata.get("title")])
+
+        for value in fields:
+            text = str(value or "").strip()
+            if text:
+                return text
+
+        return os.path.splitext(os.path.basename(path))[0]
+
+    def _extract_workflow(self, payload):
+        if self._looks_like_workflow(payload):
+            return payload
+        if isinstance(payload, dict):
+            workflow = payload.get("workflow")
+            if self._looks_like_workflow(workflow):
+                return workflow
+        return None
+
+    def _looks_like_workflow(self, value):
+        if not isinstance(value, dict) or not value:
+            return False
+        for node in value.values():
+            if isinstance(node, dict) and str(node.get("class_type", "")).strip():
+                return True
+        return False
+
+    def templates_for_context(self, source_file=None):
+        templates = self.discover()
+        media_type = ""
+        if source_file:
+            media_type = str(source_file.data.get("media_type", "")).strip().lower()
+
+        filtered = []
+        for template in templates:
+            category = str(template.get("category", "unknown"))
+            input_types = set(template.get("input_types", []))
+            if source_file:
+                if category not in ("enhance", "unknown"):
+                    continue
+                if input_types and media_type not in input_types:
+                    continue
+            else:
+                if category not in ("create", "unknown"):
+                    continue
+                if category == "unknown" and input_types:
+                    continue
+            filtered.append(template)
+        return filtered
+
+    def get_template(self, template_id):
+        template_id = str(template_id or "").strip()
+        if not template_id:
+            return None
+        for template in self.discover():
+            if str(template.get("id")) == template_id:
+                return template
+        return None
+
+    def get_workflow_copy(self, template_id):
+        template = self.get_template(template_id)
+        if not template:
+            return None
+        return copy.deepcopy(template.get("workflow") or {})
+
+    def output_icon_name(self, template):
+        explicit_icon = str((template or {}).get("action_icon") or "").strip()
+        if explicit_icon:
+            return explicit_icon
+        kind = str((template or {}).get("primary_output") or "unknown")
+        if kind == "video":
+            return "ai-action-create-video.svg"
+        if kind == "audio":
+            return "ai-action-create-audio.svg"
+        if kind == "image":
+            return "ai-action-create-image.svg"
+        return "tool-generate-sparkle.svg"
diff --git a/src/classes/generation_queue.py b/src/classes/generation_queue.py
new file mode 100644
index 000000000..04a985988
--- /dev/null
+++ b/src/classes/generation_queue.py
@@ -0,0 +1,879 @@
+"""
+ @file
+ @brief This file contains a lightweight in-memory generation queue for ComfyUI jobs.
+ @author Jonathan Thomas <jonathan@openshot.org>
+
+ @section LICENSE
+
+ Copyright (c) 2008-2026 OpenShot Studios, LLC
+ (http://www.openshotstudios.com). This file is part of
+ OpenShot Video Editor (http://www.openshot.org), an open-source project
+ dedicated to delivering high quality video editing and animation solutions
+ to the world.
+
+ OpenShot Video Editor is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ OpenShot Video Editor is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with OpenShot Library.  If not, see <http://www.gnu.org/licenses/>.
+ """
+
+import uuid
+from collections import deque
+from threading import Event
+from time import monotonic
+
+from PyQt5.QtCore import QObject, QThread, pyqtSignal, pyqtSlot
+
+from classes.comfy_client import ComfyClient
+from classes.logger import log
+
+
+class _GenerationWorker(QObject):
+    """Background worker that simulates generation progress for queued jobs."""
+
+    progress_changed = pyqtSignal(str, int)
+    progress_detail_changed = pyqtSignal(str, str)
+    progress_sub_changed = pyqtSignal(str, int)
+    job_finished = pyqtSignal(str, bool, bool, str, object)
+
+    def __init__(self):
+        super().__init__()
+        self._cancel_requested = set()
+        self._job_prompts = {}
+
+    def _is_cancel_requested(self, job_id, cancel_event):
+        return (job_id in self._cancel_requested) or (cancel_event is not None and cancel_event.is_set())
+
+    @staticmethod
+    def _is_unfinished_meta_batch(history_entry):
+        outputs = history_entry.get("outputs", {}) if isinstance(history_entry, dict) else {}
+        if not isinstance(outputs, dict):
+            return False
+        for node_out in outputs.values():
+            if not isinstance(node_out, dict):
+                continue
+            unfinished = node_out.get("unfinished_batch", None)
+            if isinstance(unfinished, list):
+                if any(bool(v) for v in unfinished):
+                    return True
+            elif unfinished:
+                return True
+        return False
+
+    @staticmethod
+    def _history_prompt_meta(history_entry):
+        prompt_payload = history_entry.get("prompt", []) if isinstance(history_entry, dict) else []
+        if not isinstance(prompt_payload, list):
+            return "", 0
+        client_payload = prompt_payload[3] if len(prompt_payload) >= 4 else {}
+        if not isinstance(client_payload, dict):
+            return "", 0
+        client_id = str(client_payload.get("client_id", "")).strip()
+        create_time = int(client_payload.get("create_time", 0) or 0)
+        return client_id, create_time
+
+    @staticmethod
+    def _allow_unfiltered_output_fallback(template_id):
+        template_id = str(template_id or "").strip().lower()
+        # Track-object templates intentionally have multiple save nodes
+        # (mask/debug + final), so we must not relax save-node filtering.
+        if template_id in (
+            "video-blur-anything-sam2",
+            "video-mask-anything-sam2",
+            "video-highlight-anything-sam2",
+            "txt2music-ace-step",
+        ):
+            return False
+        return True
+
+    def _find_related_meta_batch_outputs(self, client, history_entry, save_node_ids, template_id=""):
+        base_client_id, base_create_time = self._history_prompt_meta(history_entry)
+        if not base_client_id:
+            return []
+        try:
+            history_all = client.history_all() or {}
+        except Exception:
+            return []
+        if not isinstance(history_all, dict):
+            return []
+
+        best_create_time = 0
+        best_outputs = []
+        for entry in history_all.values():
+            if not isinstance(entry, dict):
+                continue
+            status_obj = entry.get("status", {}) if isinstance(entry, dict) else {}
+            status_str = str(status_obj.get("status_str", "")).lower()
+            if status_str not in ("success", "completed", ""):
+                continue
+            entry_client_id, entry_create_time = self._history_prompt_meta(entry)
+            if entry_client_id != base_client_id:
+                continue
+            if entry_create_time and base_create_time and entry_create_time < base_create_time:
+                continue
+            if self._is_unfinished_meta_batch(entry):
+                continue
+
+            outputs = ComfyClient.extract_file_outputs(entry, save_node_ids=save_node_ids)
+            if (not outputs) and save_node_ids and self._allow_unfiltered_output_fallback(template_id):
+                outputs = ComfyClient.extract_file_outputs(entry, save_node_ids=None)
+            if not outputs:
+                continue
+
+            if entry_create_time >= best_create_time:
+                best_create_time = entry_create_time
+                best_outputs = outputs
+
+        return best_outputs
+
+    @pyqtSlot(str, object)
+    def run_job(self, job_id, request):
+        request = request or {}
+        cancel_event = request.get("cancel_event")
+        if request.get("workflow") and request.get("comfy_url"):
+            self._run_comfy_job(job_id, request)
+            return
+
+        canceled = False
+        for step in range(1, 21):
+            QThread.msleep(250)
+            if self._is_cancel_requested(job_id, cancel_event):
+                canceled = True
+                break
+            self.progress_changed.emit(job_id, min(step * 5, 99))
+
+        self._cancel_requested.discard(job_id)
+
+        if canceled:
+            self.job_finished.emit(job_id, False, True, "", [])
+        else:
+            self.progress_changed.emit(job_id, 100)
+            self.job_finished.emit(job_id, True, False, "", [])
+
+    @pyqtSlot(str)
+    def cancel_job(self, job_id):
+        log.debug("GenerationWorker cancel_job received job=%s", str(job_id))
+        self._cancel_requested.add(job_id)
+
+    def _run_comfy_job(self, job_id, request):
+        comfy_url = request.get("comfy_url")
+        workflow = request.get("workflow")
+        client_id = request.get("client_id") or "openshot-qt"
+        timeout_s = int(request.get("timeout_s") or 86400)  # default 24 hours safety cap
+        save_node_ids = list(request.get("save_node_ids") or [])
+        template_id = str(request.get("template_id") or "")
+        cancel_event = request.get("cancel_event")
+        client = ComfyClient(comfy_url)
+        ws_client = None
+
+        try:
+            prompt_id = client.queue_prompt(workflow, client_id)
+            if not prompt_id:
+                self.job_finished.emit(job_id, False, False, "ComfyUI returned an invalid prompt_id", [])
+                return
+            self._job_prompts[job_id] = prompt_id
+            try:
+                ws_client = ComfyClient.open_progress_socket(comfy_url, client_id)
+                log.debug("Comfy progress websocket connected for prompt=%s", str(prompt_id))
+            except Exception:
+                log.debug("Comfy progress websocket unavailable; continuing without live progress", exc_info=True)
+
+            start_time = monotonic()
+            last_in_queue_time = start_time
+            last_contact_time = start_time
+            last_progress_log_time = 0.0
+            last_network_error_log_time = 0.0
+            progress_endpoint_unavailable = False
+            accepted_progress_started = False
+            ws_retry_delay_s = 2.0
+            ws_next_retry_at = start_time
+            ws_last_progress_time = start_time
+            ws_stale_reconnect_s = 60.0
+            ws_stale_reconnect_max_s = 300.0
+            prompt_key = str(prompt_id)
+            last_progress_signature = None
+            last_progress_detail = ""
+
+            while True:
+                if self._is_cancel_requested(job_id, cancel_event):
+                    log.debug("Comfy cancel requested for job=%s prompt=%s", job_id, str(prompt_id))
+                    cancel_ok = False
+                    cancel_errors = []
+
+                    # Retry cancellation a few times and verify prompt no longer appears in Comfy queue.
+                    for attempt in range(1, 181):
+                        try:
+                            cancel_ok = client.cancel_prompt(prompt_id) or cancel_ok
+                        except Exception as ex:
+                            cancel_errors.append("queue: {}".format(ex))
+
+                        try:
+                            cancel_ok = client.interrupt(prompt_id=prompt_id) or cancel_ok
+                        except Exception as ex:
+                            cancel_errors.append("interrupt: {}".format(ex))
+
+                        try:
+                            history = client.history(prompt_id) or {}
+                            prompt_key = str(prompt_id)
+                            history_entry = history.get(prompt_key) or history.get(prompt_id) or None
+                            if isinstance(history_entry, dict):
+                                status_obj = history_entry.get("status", {}) if isinstance(history_entry, dict) else {}
+                                status_str = str(status_obj.get("status_str", "")).lower()
+                                # Comfy commonly marks interrupted runs as failed/error in history.
+                                if status_str in ("error", "failed"):
+                                    cancel_ok = True
+                                    log.debug(
+                                        "Comfy cancel confirmed by history status for job=%s prompt=%s status=%s",
+                                        job_id,
+                                        prompt_key,
+                                        status_str,
+                                    )
+                                    break
+                        except Exception as ex:
+                            cancel_errors.append("history-check: {}".format(ex))
+
+                        try:
+                            queue_data = client.queue() or {}
+                            if not ComfyClient.prompt_in_queue(prompt_id, queue_data):
+                                cancel_ok = True
+                                log.debug(
+                                    "Comfy cancel confirmed by queue absence for job=%s prompt=%s on attempt=%s",
+                                    job_id,
+                                    str(prompt_id),
+                                    attempt,
+                                )
+                                break
+                        except Exception as ex:
+                            cancel_errors.append("queue-check: {}".format(ex))
+
+                        if attempt % 10 == 0:
+                            log.debug(
+                                "Comfy cancel still pending job=%s prompt=%s attempt=%s",
+                                job_id,
+                                str(prompt_id),
+                                attempt,
+                            )
+                        QThread.msleep(500)
+
+                    self._cancel_requested.discard(job_id)
+                    self._job_prompts.pop(job_id, None)
+                    if cancel_ok:
+                        self.job_finished.emit(job_id, False, True, "", [])
+                    else:
+                        self.job_finished.emit(
+                            job_id,
+                            False,
+                            False,
+                            "ComfyUI did not accept cancel request ({})".format("; ".join(cancel_errors) or "unknown"),
+                            [],
+                        )
+                    return
+
+                history_entry = None
+                try:
+                    history = client.history(prompt_id) or {}
+                    history_entry = history.get(prompt_key) or history.get(prompt_id) or None
+                    last_contact_time = monotonic()
+                except Exception:
+                    now_log = monotonic()
+                    if (now_log - last_network_error_log_time) > 8.0:
+                        log.debug(
+                            "Comfy history poll temporarily unavailable for job=%s prompt=%s; retrying",
+                            job_id,
+                            prompt_key,
+                            exc_info=True,
+                        )
+                        last_network_error_log_time = now_log
+                if history_entry is not None:
+                    status_obj = history_entry.get("status", {}) if isinstance(history_entry, dict) else {}
+                    status_str = str(status_obj.get("status_str", "")).lower()
+                    if status_str in ("error", "failed"):
+                        error_text = "ComfyUI job failed."
+                        messages = status_obj.get("messages", [])
+                        if isinstance(messages, list) and messages:
+                            error_text = ComfyClient.summarize_error_text(messages[-1])
+                        self._job_prompts.pop(job_id, None)
+                        self.job_finished.emit(job_id, False, False, error_text, [])
+                        return
+                    if self._is_unfinished_meta_batch(history_entry):
+                        image_outputs = self._find_related_meta_batch_outputs(
+                            client,
+                            history_entry,
+                            save_node_ids,
+                            template_id=template_id,
+                        )
+                        if image_outputs:
+                            self.progress_changed.emit(job_id, 100)
+                            self._job_prompts.pop(job_id, None)
+                            self.job_finished.emit(job_id, True, False, "", image_outputs)
+                            return
+                        # Meta batch uses follow-up prompts under the same client_id.
+                        # Keep polling progress/queue while waiting for follow-up prompt outputs.
+                    else:
+                        image_outputs = ComfyClient.extract_file_outputs(history_entry, save_node_ids=save_node_ids)
+                        if (not image_outputs) and save_node_ids and self._allow_unfiltered_output_fallback(template_id):
+                            # Fallback for workflows whose output node ids shift or emit non-standard keys.
+                            image_outputs = ComfyClient.extract_file_outputs(history_entry, save_node_ids=None)
+                        self.progress_changed.emit(job_id, 100)
+                        self._job_prompts.pop(job_id, None)
+                        self.job_finished.emit(job_id, True, False, "", image_outputs)
+                        return
+
+                # Query ComfyUI's live progress values when available.
+                try:
+                    ws_progress_emitted = False
+                    now = monotonic()
+                    if ws_client is None and now >= ws_next_retry_at:
+                        try:
+                            ws_client = ComfyClient.open_progress_socket(comfy_url, client_id)
+                            ws_retry_delay_s = 2.0
+                            log.debug("Comfy progress websocket reconnected for prompt=%s", prompt_key)
+                        except Exception:
+                            ws_next_retry_at = now + ws_retry_delay_s
+                            ws_retry_delay_s = min(60.0, ws_retry_delay_s * 1.5)
+                            now_log = monotonic()
+                            if (now_log - last_network_error_log_time) > 8.0:
+                                log.debug(
+                                    "Comfy websocket reconnect failed for job=%s prompt=%s; retrying in %.1fs",
+                                    job_id,
+                                    prompt_key,
+                                    ws_retry_delay_s,
+                                    exc_info=True,
+                                )
+                                last_network_error_log_time = now_log
+
+                    if ws_client is not None:
+                        try:
+                            # Accept progress from follow-up prompts as well (meta-batch).
+                            progress_event = ws_client.poll_progress(prompt_id=None)
+                        except Exception:
+                            progress_event = None
+                            try:
+                                ws_client.close()
+                            except Exception:
+                                pass
+                            ws_client = None
+                            ws_next_retry_at = monotonic() + ws_retry_delay_s
+                            ws_retry_delay_s = min(60.0, ws_retry_delay_s * 1.5)
+                            now_log = monotonic()
+                            if (now_log - last_network_error_log_time) > 8.0:
+                                log.debug(
+                                    "Comfy websocket progress read failed for job=%s prompt=%s; switching to retry mode",
+                                    job_id,
+                                    prompt_key,
+                                    exc_info=True,
+                                )
+                                last_network_error_log_time = now_log
+
+                        if progress_event is not None:
+                            elapsed = monotonic() - start_time
+                            progress = int(progress_event.get("percent", 0))
+                            raw_value = float(progress_event.get("value", 0.0))
+                            raw_max = float(progress_event.get("max", 0.0))
+                            progress_type = str(progress_event.get("type", ""))
+                            progress_node = str(progress_event.get("node", ""))
+                            # Some workflows emit near-complete progress bursts at startup
+                            # (e.g. tiny setup nodes), then reset to sampler progress.
+                            # Ignore those bootstrap spikes for a short window.
+                            if (
+                                (not accepted_progress_started)
+                                and progress >= 95
+                                and elapsed < 20.0
+                                and raw_max <= 1.0
+                            ):
+                                log.debug(
+                                    "Comfy WS progress setup-node spike ignored job=%s prompt=%s node=%s type=%s value=%s max=%s percent=%s elapsed=%.2fs",
+                                    job_id,
+                                    prompt_key,
+                                    progress_node,
+                                    progress_type,
+                                    raw_value,
+                                    raw_max,
+                                    progress,
+                                    elapsed,
+                                )
+                            elif (not accepted_progress_started) and progress >= 95 and elapsed < 20.0:
+                                log.debug(
+                                    "Comfy WS progress bootstrap spike ignored job=%s prompt=%s percent=%s elapsed=%.2fs",
+                                    job_id,
+                                    prompt_key,
+                                    progress,
+                                    elapsed,
+                                )
+                            else:
+                                accepted_progress_started = True
+                                progress_signature = (
+                                    progress_type,
+                                    progress_node,
+                                    int(progress),
+                                    round(raw_value, 3),
+                                    round(raw_max, 3),
+                                )
+                                if progress_signature != last_progress_signature:
+                                    inferred_progress = int(max(0, min(99, progress)))
+                                    detail_text = ""
+                                    if progress_node:
+                                        detail_text = "node {} {}%".format(progress_node, int(progress))
+
+                                    log.debug(
+                                        "Comfy WS progress emit job=%s prompt=%s node=%s type=%s value=%s max=%s percent=%s",
+                                        job_id,
+                                        prompt_key,
+                                        progress_node,
+                                        progress_type,
+                                        raw_value,
+                                        raw_max,
+                                        inferred_progress,
+                                    )
+                                    self.progress_changed.emit(job_id, inferred_progress)
+                                    self.progress_sub_changed.emit(job_id, int(max(0, min(99, progress))))
+                                    if detail_text != last_progress_detail:
+                                        self.progress_detail_changed.emit(job_id, detail_text)
+                                        last_progress_detail = detail_text
+                                    last_progress_signature = progress_signature
+                                ws_progress_emitted = True
+                                ws_last_progress_time = monotonic()
+                                ws_stale_reconnect_s = 60.0
+                                last_contact_time = monotonic()
+                    if ws_client is not None and not ws_progress_emitted:
+                        stale_for = now - ws_last_progress_time
+                        if stale_for >= ws_stale_reconnect_s:
+                            try:
+                                ws_client.close()
+                            except Exception:
+                                pass
+                            ws_client = None
+                            ws_next_retry_at = now + ws_retry_delay_s
+                            ws_retry_delay_s = min(60.0, ws_retry_delay_s * 1.5)
+                            next_stale_reconnect_s = min(
+                                ws_stale_reconnect_max_s,
+                                max(60.0, ws_stale_reconnect_s * 1.5),
+                            )
+                            log.debug(
+                                "Comfy websocket stalled for job=%s prompt=%s (%.1fs >= %.1fs); forcing reconnect, next stall timeout %.1fs",
+                                job_id,
+                                prompt_key,
+                                stale_for,
+                                ws_stale_reconnect_s,
+                                next_stale_reconnect_s,
+                            )
+                            ws_stale_reconnect_s = next_stale_reconnect_s
+                    # Use HTTP /progress only when websocket progress is unavailable.
+                    # If websocket is connected but temporarily quiet, keep waiting for WS
+                    # instead of spamming a misleading 404 fallback warning.
+                    if ws_client is None:
+                        progress_data = client.progress()
+                        if progress_data is None:
+                            if not progress_endpoint_unavailable:
+                                log.debug(
+                                    "Comfy progress endpoint unavailable (404); waiting for websocket progress for job=%s",
+                                    job_id,
+                                )
+                            progress_endpoint_unavailable = True
+                            progress_data = {}
+
+                        progress_block = progress_data.get("progress", progress_data)
+                        if not isinstance(progress_block, dict):
+                            progress_block = {}
+
+                        value = float(progress_block.get("value", progress_block.get("current", 0.0)))
+                        maximum = float(progress_block.get("max", progress_block.get("total", 0.0)))
+                        progress_prompt = str(
+                            progress_data.get("prompt_id", progress_block.get("prompt_id", ""))
+                        )
+                        prompt_matches = (not progress_prompt) or (progress_prompt == prompt_key)
+
+                        now_log = monotonic()
+                        if (now_log - last_progress_log_time) > 8.0:
+                            log.debug(
+                                "Comfy progress poll job=%s prompt=%s payload_keys=%s value=%s max=%s progress_prompt=%s prompt_match=%s",
+                                job_id,
+                                prompt_key,
+                                list(progress_data.keys()) if isinstance(progress_data, dict) else type(progress_data),
+                                value,
+                                maximum,
+                                progress_prompt,
+                                prompt_matches,
+                            )
+                            last_progress_log_time = now_log
+
+                        if maximum > 0 and prompt_matches:
+                            progress = int(max(0, min(99, round((value / maximum) * 100.0))))
+                            progress_signature = ("poll", "", int(progress), round(value, 3), round(maximum, 3))
+                            if progress_signature != last_progress_signature:
+                                log.debug(
+                                    "Comfy progress emit job=%s prompt=%s value=%s max=%s percent=%s",
+                                    job_id,
+                                    prompt_key,
+                                    value,
+                                    maximum,
+                                    progress,
+                                )
+                                self.progress_changed.emit(job_id, progress)
+                                self.progress_sub_changed.emit(job_id, int(max(0, min(99, progress))))
+                                if last_progress_detail:
+                                    self.progress_detail_changed.emit(job_id, "")
+                                    last_progress_detail = ""
+                                last_progress_signature = progress_signature
+                            last_contact_time = monotonic()
+                except Exception:
+                    # Keep polling history and queue even if /progress is unavailable.
+                    now_log = monotonic()
+                    if (now_log - last_network_error_log_time) > 8.0:
+                        log.debug("Comfy progress poll failed for job=%s", job_id, exc_info=True)
+                        last_network_error_log_time = now_log
+
+                # Check queue to avoid timing out long-running but active jobs.
+                in_queue = False
+                try:
+                    queue_data = client.queue() or {}
+                    in_queue = ComfyClient.prompt_in_queue(prompt_id, queue_data)
+                    last_contact_time = monotonic()
+                except Exception:
+                    # If queue check fails, do not penalize the job immediately.
+                    in_queue = True
+                    now_log = monotonic()
+                    if (now_log - last_network_error_log_time) > 8.0:
+                        log.debug("Comfy queue check temporarily unavailable for job=%s", job_id, exc_info=True)
+                        last_network_error_log_time = now_log
+                if in_queue:
+                    last_in_queue_time = monotonic()
+                else:
+                    now_log = monotonic()
+                    if (now_log - last_progress_log_time) > 8.0:
+                        log.debug(
+                            "Comfy queue check: prompt=%s not found in queue_running/queue_pending yet",
+                            prompt_key,
+                        )
+                        last_progress_log_time = now_log
+
+                now = monotonic()
+                if (now - start_time) > timeout_s:
+                    self._job_prompts.pop(job_id, None)
+                    self.job_finished.emit(job_id, False, False, "Timed out waiting for ComfyUI history result", [])
+                    return
+
+                if (now - last_contact_time) > 60.0:
+                    now_log = monotonic()
+                    if (now_log - last_network_error_log_time) > 8.0:
+                        log.debug(
+                            "Comfy connection degraded for job=%s prompt=%s (no successful API contact for %.1fs); continuing retries",
+                            job_id,
+                            prompt_key,
+                            now - last_contact_time,
+                        )
+                        last_network_error_log_time = now_log
+
+                # If prompt vanished from queue for an extended period and still no history, treat as failure.
+                if (now - last_in_queue_time) > 600:
+                    self._job_prompts.pop(job_id, None)
+                    self.job_finished.emit(
+                        job_id,
+                        False,
+                        False,
+                        "ComfyUI prompt is no longer in queue and has no history result.",
+                        [],
+                    )
+                    return
+                QThread.msleep(500)
+        except Exception as ex:
+            self._job_prompts.pop(job_id, None)
+            self.job_finished.emit(job_id, False, False, ComfyClient.summarize_error_text(ex), [])
+        finally:
+            if ws_client is not None:
+                ws_client.close()
+
+
+class GenerationQueueManager(QObject):
+    """Single-worker, in-memory generation queue with per-file active-job limits."""
+
+    ACTIVE_STATES = {"queued", "running", "canceling"}
+
+    job_added = pyqtSignal(str, object)
+    job_updated = pyqtSignal(str, str, int)
+    job_finished = pyqtSignal(str, str)
+    job_removed = pyqtSignal(str)
+    file_job_changed = pyqtSignal(str)
+    queue_changed = pyqtSignal()
+
+    _run_job = pyqtSignal(str, object)
+    _cancel_job = pyqtSignal(str)
+
+    def __init__(self, parent=None):
+        super().__init__(parent)
+        self.jobs = {}
+        self._queued = deque()
+        self._running_job_id = None
+        self._active_file_jobs = {}
+
+        self._thread = QThread(self)
+        self._thread.setObjectName("generation_queue_worker")
+        self._worker = _GenerationWorker()
+        self._worker.moveToThread(self._thread)
+        self._run_job.connect(self._worker.run_job)
+        self._cancel_job.connect(self._worker.cancel_job)
+        self._worker.progress_changed.connect(self._on_progress_changed)
+        self._worker.progress_detail_changed.connect(self._on_progress_detail_changed)
+        self._worker.progress_sub_changed.connect(self._on_progress_sub_changed)
+        self._worker.job_finished.connect(self._on_job_finished)
+        self._thread.start()
+
+    def enqueue(self, name, template_id, prompt, source_file_id=None, request=None):
+        source_file_id = str(source_file_id or "")
+        if source_file_id and self.get_active_job_for_file(source_file_id):
+            return None
+
+        job_id = str(uuid.uuid4())
+        cancel_event = Event()
+        job_request = dict(request or {})
+        job_request["cancel_event"] = cancel_event
+        job = {
+            "id": job_id,
+            "name": str(name or "").strip(),
+            "template_id": str(template_id or "").strip(),
+            "prompt": str(prompt or "").strip(),
+            "source_file_id": source_file_id,
+            "status": "queued",
+            "progress": 0,
+            "sub_progress": 0,
+            "progress_detail": "",
+            "error": "",
+            "request": job_request,
+            "cancel_event": cancel_event,
+        }
+        self.jobs[job_id] = job
+        self._queued.append(job_id)
+        if source_file_id:
+            self._active_file_jobs[source_file_id] = job_id
+
+        self.job_added.emit(job_id, source_file_id)
+        self.job_updated.emit(job_id, "queued", 0)
+        self._emit_file_changed(source_file_id)
+        self.queue_changed.emit()
+        self._start_next_if_idle()
+        return job_id
+
+    def cancel_job(self, job_id):
+        job = self.jobs.get(job_id)
+        if not job:
+            log.debug("GenerationQueue cancel_job ignored; unknown job=%s", str(job_id))
+            return False
+
+        log.debug(
+            "GenerationQueue cancel_job request job=%s status=%s source_file_id=%s",
+            str(job_id),
+            str(job.get("status", "")),
+            str(job.get("source_file_id", "")),
+        )
+        if job["status"] == "queued":
+            cancel_event = job.get("cancel_event")
+            if cancel_event is not None:
+                cancel_event.set()
+                log.debug("GenerationQueue cancel_event set for queued job=%s", str(job_id))
+            job["status"] = "canceled"
+            self._queued = deque([queued_id for queued_id in self._queued if queued_id != job_id])
+            self._release_file_slot(job.get("source_file_id", ""))
+            self.job_updated.emit(job_id, "canceled", int(job.get("progress", 0)))
+            self.job_finished.emit(job_id, "canceled")
+            self._emit_file_changed(job.get("source_file_id", ""))
+            self.queue_changed.emit()
+            log.debug("GenerationQueue cancel_job completed for queued job=%s", str(job_id))
+            return True
+
+        if job["status"] == "running":
+            cancel_event = job.get("cancel_event")
+            if cancel_event is not None:
+                cancel_event.set()
+                log.debug("GenerationQueue cancel_event set for running job=%s", str(job_id))
+            job["status"] = "canceling"
+            self.job_updated.emit(job_id, "canceling", int(job.get("progress", 0)))
+            self._cancel_job.emit(job_id)
+            self._emit_file_changed(job.get("source_file_id", ""))
+            self.queue_changed.emit()
+            log.debug("GenerationQueue cancel_job emitted worker cancel for running job=%s", str(job_id))
+            return True
+
+        log.debug("GenerationQueue cancel_job ignored for job=%s with status=%s", str(job_id), str(job.get("status", "")))
+        return False
+
+    def cancel_jobs_for_file(self, source_file_id):
+        source_file_id = str(source_file_id or "")
+        if not source_file_id:
+            return
+        for job in list(self.jobs.values()):
+            if job.get("source_file_id") == source_file_id and job.get("status") in self.ACTIVE_STATES:
+                self.cancel_job(job["id"])
+
+    def remove_job(self, job_id):
+        job = self.jobs.get(job_id)
+        if not job:
+            return False
+        if job.get("status") in self.ACTIVE_STATES:
+            return False
+
+        source_file_id = job.get("source_file_id", "")
+        self.jobs.pop(job_id, None)
+        self.job_removed.emit(job_id)
+        self._emit_file_changed(source_file_id)
+        self.queue_changed.emit()
+        return True
+
+    def get_job(self, job_id):
+        return self.jobs.get(job_id)
+
+    def get_active_job_for_file(self, source_file_id):
+        source_file_id = str(source_file_id or "")
+        if not source_file_id:
+            return None
+
+        job_id = self._active_file_jobs.get(source_file_id)
+        if not job_id:
+            return None
+
+        job = self.jobs.get(job_id)
+        if not job or job.get("status") not in self.ACTIVE_STATES:
+            self._active_file_jobs.pop(source_file_id, None)
+            return None
+        return job
+
+    def get_file_badge(self, source_file_id):
+        job = self.get_active_job_for_file(source_file_id)
+        if not job:
+            return None
+
+        status = job.get("status")
+        progress = int(job.get("progress", 0))
+        sub_progress = int(job.get("sub_progress", 0))
+        detail = str(job.get("progress_detail", "") or "").strip()
+        if status == "queued":
+            label = "Queued"
+        elif status == "running":
+            label = "Generating {}%".format(progress)
+            if detail:
+                label = "{} ({})".format(label, detail)
+        elif status == "canceling":
+            label = "Canceling..."
+        else:
+            label = status.capitalize()
+
+        return {
+            "status": status,
+            "progress": progress,
+            "sub_progress": sub_progress,
+            "label": label,
+            "job_id": job.get("id"),
+        }
+
+    def shutdown(self):
+        if self._thread.isRunning():
+            self._thread.quit()
+            self._thread.wait(2000)
+
+    def _start_next_if_idle(self):
+        if self._running_job_id is not None:
+            return
+        if not self._queued:
+            return
+
+        next_job_id = self._queued.popleft()
+        job = self.jobs.get(next_job_id)
+        if not job:
+            self._start_next_if_idle()
+            return
+
+        self._running_job_id = next_job_id
+        job["status"] = "running"
+        job["progress"] = int(job.get("progress", 0))
+        job["sub_progress"] = int(job.get("sub_progress", 0))
+        self.job_updated.emit(next_job_id, "running", int(job["progress"]))
+        self._emit_file_changed(job.get("source_file_id", ""))
+        self.queue_changed.emit()
+        self._run_job.emit(next_job_id, job.get("request", {}))
+
+    def _release_file_slot(self, source_file_id):
+        source_file_id = str(source_file_id or "")
+        if source_file_id:
+            self._active_file_jobs.pop(source_file_id, None)
+
+    def _emit_file_changed(self, source_file_id):
+        source_file_id = str(source_file_id or "")
+        if source_file_id:
+            self.file_job_changed.emit(source_file_id)
+
+    @pyqtSlot(str, int)
+    def _on_progress_changed(self, job_id, progress):
+        job = self.jobs.get(job_id)
+        if not job:
+            return
+        if job.get("status") not in ("running", "canceling"):
+            return
+        job["progress"] = int(progress)
+        self.job_updated.emit(job_id, job.get("status"), int(progress))
+        self._emit_file_changed(job.get("source_file_id", ""))
+        self.queue_changed.emit()
+
+    @pyqtSlot(str, str)
+    def _on_progress_detail_changed(self, job_id, detail):
+        job = self.jobs.get(job_id)
+        if not job:
+            return
+        if job.get("status") not in ("running", "canceling"):
+            return
+        detail_text = str(detail or "").strip()
+        if str(job.get("progress_detail", "") or "") == detail_text:
+            return
+        job["progress_detail"] = detail_text
+        self.job_updated.emit(job_id, job.get("status"), int(job.get("progress", 0)))
+        self._emit_file_changed(job.get("source_file_id", ""))
+        self.queue_changed.emit()
+
+    @pyqtSlot(str, int)
+    def _on_progress_sub_changed(self, job_id, progress):
+        job = self.jobs.get(job_id)
+        if not job:
+            return
+        if job.get("status") not in ("running", "canceling"):
+            return
+        p = int(max(0, min(99, progress)))
+        if int(job.get("sub_progress", 0)) == p:
+            return
+        job["sub_progress"] = p
+        self.job_updated.emit(job_id, job.get("status"), int(job.get("progress", 0)))
+        self._emit_file_changed(job.get("source_file_id", ""))
+        self.queue_changed.emit()
+
+    @pyqtSlot(str, bool, bool, str, object)
+    def _on_job_finished(self, job_id, success, canceled, error, outputs):
+        job = self.jobs.get(job_id)
+        if not job:
+            return
+
+        if canceled:
+            job["status"] = "canceled"
+        elif success:
+            job["status"] = "completed"
+            job["progress"] = 100
+            job["outputs"] = list(outputs or [])
+        else:
+            job["status"] = "failed"
+            job["error"] = str(error or "")
+
+        source_file_id = job.get("source_file_id", "")
+        self._release_file_slot(source_file_id)
+
+        self.job_updated.emit(job_id, job["status"], int(job.get("progress", 0)))
+        self.job_finished.emit(job_id, job["status"])
+        self._emit_file_changed(source_file_id)
+        self.queue_changed.emit()
+
+        if self._running_job_id == job_id:
+            self._running_job_id = None
+        self._start_next_if_idle()
diff --git a/src/classes/generation_service.py b/src/classes/generation_service.py
new file mode 100644
index 000000000..31bf272a5
--- /dev/null
+++ b/src/classes/generation_service.py
@@ -0,0 +1,1586 @@
+"""
+ @file
+ @brief This file contains Comfy generation orchestration logic.
+ @author Jonathan Thomas <jonathan@openshot.org>
+
+ @section LICENSE
+
+ Copyright (c) 2008-2026 OpenShot Studios, LLC
+ (http://www.openshotstudios.com). This file is part of
+ OpenShot Video Editor (http://www.openshot.org), an open-source project
+ dedicated to delivering high quality video editing and animation solutions
+ to the world.
+
+ OpenShot Video Editor is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ OpenShot Video Editor is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with OpenShot Library.  If not, see <http://www.gnu.org/licenses/>.
+ """
+
+import os
+import re
+import tempfile
+import json
+import random
+from time import time
+from urllib.parse import unquote
+from fractions import Fraction
+
+import openshot
+from PyQt5.QtWidgets import QMessageBox, QDialog
+
+from classes import info
+from classes import time_parts
+from classes.app import get_app
+from classes.comfy_client import ComfyClient
+from classes.comfy_templates import ComfyTemplateRegistry
+from classes.comfy_pipelines import (
+    build_workflow,
+    is_supported_img2img_path,
+    pipeline_requires_checkpoint,
+    pipeline_requires_svd_checkpoint,
+    pipeline_requires_stable_audio_clip,
+    pipeline_requires_rife_model,
+    pipeline_requires_upscale_model,
+    DEFAULT_RIFE_VFI_MODEL,
+    DEFAULT_SD_CHECKPOINT,
+    DEFAULT_SD_BASE_CHECKPOINT,
+    DEFAULT_STABLE_AUDIO_CHECKPOINT,
+    DEFAULT_STABLE_AUDIO_CLIP,
+    DEFAULT_SVD_CHECKPOINT,
+    DEFAULT_UPSCALE_MODEL,
+)
+from classes.logger import log
+from classes.query import File
+from windows.generate import GenerateMediaDialog
+
+
+class GenerationService:
+    """Encapsulates generation-specific UI + workflow behavior."""
+
+    LEGACY_PIPELINE_IDS = {
+        "txt2audio-stable-open",
+        "img2img-basic",
+        "upscale-realesrgan-x4",
+        "video-segment-scenes-transnet",
+        "video-frame-interpolation-rife2x",
+        "video-upscale-gan",
+        "video2video-basic",
+        "video-whisper-srt",
+    }
+    SAM2_DEFAULT_TARGET_BATCH_BYTES = 4 * 1024 * 1024 * 1024  # 4 GiB
+    SAM2_ESTIMATED_BYTES_PER_PIXEL = 24.0
+    SAM2_ESTIMATED_BYTES_PER_PIXEL_HIGHLIGHT = 64.0
+    SAM2_ESTIMATED_BYTES_PER_PIXEL_BLUR = 40.0
+    SAM2_MIN_FRAMES_PER_BATCH = 4
+    SAM2_MAX_FRAMES_PER_BATCH = 192
+
+    def __init__(self, win):
+        self.win = win
+        self._generation_temp_files = []
+        self._comfy_status_cache = {"checked_at": 0.0, "available": False, "url": ""}
+        self._last_logged_comfy_state = None
+        self.template_registry = ComfyTemplateRegistry()
+
+    def cleanup_temp_files(self):
+        for tmp_path in list(self._generation_temp_files):
+            try:
+                if tmp_path and os.path.exists(tmp_path):
+                    os.remove(tmp_path)
+            except OSError:
+                pass
+        self._generation_temp_files = []
+
+    def comfy_ui_url(self):
+        url = get_app().get_settings().get("comfy-ui-url") or "http://127.0.0.1:8188"
+        return str(url).strip().rstrip("/")
+
+    def is_comfy_available(self, force=False):
+        now = time()
+        if not force and (now - self._comfy_status_cache["checked_at"]) < 2.0:
+            return self._comfy_status_cache["available"]
+
+        url = self.comfy_ui_url()
+        available = False
+        error_text = ""
+        try:
+            available = ComfyClient(url).ping(timeout=0.5)
+        except Exception as ex:
+            available = False
+            error_text = str(ex)
+
+        previous_available = bool(self._comfy_status_cache.get("available"))
+        previous_url = str(self._comfy_status_cache.get("url", ""))
+        self._comfy_status_cache["checked_at"] = now
+        self._comfy_status_cache["available"] = available
+        self._comfy_status_cache["url"] = url
+
+        state = (url, bool(available))
+        if force or state != self._last_logged_comfy_state or previous_url != url or previous_available != available:
+            if available:
+                log.info("ComfyUI check passed at %s", url)
+            else:
+                if error_text:
+                    log.info("ComfyUI check failed at %s (%s)", url, error_text)
+                else:
+                    log.info("ComfyUI check failed at %s", url)
+            self._last_logged_comfy_state = state
+        return available
+
+    def can_open_generate_dialog(self):
+        return len(self.win.selected_file_ids()) <= 1
+
+    def _prepare_generation_source_path(self, source_file, template_id):
+        if not source_file:
+            return ""
+
+        source_path = source_file.data.get("path", "")
+        media_type = source_file.data.get("media_type")
+        if template_id not in ("img2img-basic", "upscale-realesrgan-x4", "img2video-svd") or media_type != "image":
+            return source_path
+
+        if is_supported_img2img_path(source_path):
+            return source_path
+
+        tmp_fd, tmp_png = tempfile.mkstemp(prefix="openshot-comfy-", suffix=".png")
+        os.close(tmp_fd)
+        try:
+            clip = openshot.Clip(source_path)
+            frame = clip.Reader().GetFrame(1)
+            frame.Save(tmp_png, 1.0)
+            self._generation_temp_files.append(tmp_png)
+            return tmp_png
+        except Exception:
+            try:
+                os.remove(tmp_png)
+            except OSError:
+                pass
+            raise
+
+    def _prepare_generation_video_input(self, source_file, client):
+        if not source_file:
+            raise ValueError("A source video is required.")
+        source_path = source_file.data.get("path", "")
+        if not source_path:
+            raise ValueError("Source video path is invalid.")
+        return client.upload_input_file(source_path)
+
+    def _prepare_generation_image_input(self, local_image_path, client):
+        local_image_path = str(local_image_path or "").strip()
+        if not local_image_path:
+            raise ValueError("A source image is required.")
+        return client.upload_input_file(local_image_path)
+
+    def _get_source_fps(self, source_file):
+        if not source_file:
+            return None
+        fps_data = source_file.data.get("fps")
+        if isinstance(fps_data, dict):
+            try:
+                num = float(fps_data.get("num", 0))
+                den = float(fps_data.get("den", 0))
+            except (TypeError, ValueError):
+                num = den = 0.0
+            if num > 0 and den > 0:
+                return num / den
+        return None
+
+    def _default_generation_name(self, source_file):
+        default_name = "generation"
+        if source_file:
+            path = source_file.data.get("path", "")
+            if path:
+                default_name = "{}_gen".format(os.path.splitext(os.path.basename(path))[0])
+        return default_name
+
+    def _get_source_dimensions(self, source_file):
+        if not source_file:
+            return (0, 0)
+        data = source_file.data if hasattr(source_file, "data") and isinstance(source_file.data, dict) else {}
+        try:
+            width = int(data.get("width", 0) or 0)
+        except Exception:
+            width = 0
+        try:
+            height = int(data.get("height", 0) or 0)
+        except Exception:
+            height = 0
+        return (max(0, width), max(0, height))
+
+    def _sam2_target_batch_bytes(self):
+        settings = get_app().get_settings()
+        raw_bytes = settings.get("comfy-sam2-target-batch-bytes")
+        if raw_bytes is not None:
+            try:
+                value = int(raw_bytes)
+                if value > 0:
+                    return value
+            except Exception:
+                pass
+        raw_gb = settings.get("comfy-sam2-target-batch-gb")
+        if raw_gb is not None:
+            try:
+                value = float(raw_gb)
+                if value > 0.0:
+                    return int(value * 1024 * 1024 * 1024)
+            except Exception:
+                pass
+        return int(self.SAM2_DEFAULT_TARGET_BATCH_BYTES)
+
+    def _estimate_sam2_frames_per_batch(self, width, height, bytes_per_pixel=None):
+        width = int(max(0, width))
+        height = int(max(0, height))
+        if width <= 0 or height <= 0:
+            return self.SAM2_MIN_FRAMES_PER_BATCH
+        target_bytes = self._sam2_target_batch_bytes()
+        if bytes_per_pixel is None:
+            bytes_per_pixel = self.SAM2_ESTIMATED_BYTES_PER_PIXEL
+        try:
+            bytes_per_pixel = float(bytes_per_pixel)
+        except Exception:
+            bytes_per_pixel = float(self.SAM2_ESTIMATED_BYTES_PER_PIXEL)
+        bytes_per_frame = max(
+            1.0,
+            float(width) * float(height) * bytes_per_pixel,
+        )
+        frames = int(target_bytes / bytes_per_frame)
+        frames = max(self.SAM2_MIN_FRAMES_PER_BATCH, min(self.SAM2_MAX_FRAMES_PER_BATCH, frames))
+        # Keep chunk sizes aligned for more stable batching behavior.
+        frames = max(self.SAM2_MIN_FRAMES_PER_BATCH, int((frames // 4) * 4))
+        return frames
+
+    def _apply_dynamic_sam2_meta_batch(self, workflow, source_file, template_id=None):
+        template_id = str(template_id or "").strip().lower()
+        # Only adjust non-legacy SAM2 video tracking templates/workflows.
+        if template_id and template_id not in (
+            "video-blur-anything-sam2",
+            "video-highlight-anything-sam2",
+            "video-mask-anything-sam2",
+        ):
+            return
+        if not isinstance(workflow, dict):
+            return
+
+        width, height = self._get_source_dimensions(source_file)
+        if width <= 0 or height <= 0:
+            return
+
+        has_sam2_chunked = False
+        for node in workflow.values():
+            if not isinstance(node, dict):
+                continue
+            class_type = str(node.get("class_type", "")).strip().lower()
+            if class_type == "openshotsam2videosegmentationchunked":
+                has_sam2_chunked = True
+                break
+        if not has_sam2_chunked:
+            return
+
+        # Account for downstream per-frame processing memory:
+        # - Highlight path is the heaviest (multiple full-frame tensor intermediates)
+        # - Blur path is moderately heavy
+        # - Mask-only path is closest to baseline SAM2 estimate
+        estimated_bpp = float(self.SAM2_ESTIMATED_BYTES_PER_PIXEL)
+        for node in workflow.values():
+            if not isinstance(node, dict):
+                continue
+            class_type = str(node.get("class_type", "")).strip().lower()
+            if class_type == "openshotimagehighlightmasked":
+                estimated_bpp = max(estimated_bpp, float(self.SAM2_ESTIMATED_BYTES_PER_PIXEL_HIGHLIGHT))
+            elif class_type == "openshotimageblurmasked":
+                estimated_bpp = max(estimated_bpp, float(self.SAM2_ESTIMATED_BYTES_PER_PIXEL_BLUR))
+
+        dynamic_frames = self._estimate_sam2_frames_per_batch(width, height, bytes_per_pixel=estimated_bpp)
+        updated_chunk_nodes = 0
+        updated_batch_nodes = 0
+        for node in workflow.values():
+            if not isinstance(node, dict):
+                continue
+            class_type = str(node.get("class_type", "")).strip().lower()
+            inputs = node.get("inputs", {})
+            if not isinstance(inputs, dict):
+                continue
+            if class_type == "openshotsam2videosegmentationchunked" and "chunk_size_frames" in inputs:
+                inputs["chunk_size_frames"] = int(dynamic_frames)
+                updated_chunk_nodes += 1
+            if class_type == "vhs_batchmanager" and "frames_per_batch" in inputs:
+                inputs["frames_per_batch"] = int(dynamic_frames)
+                updated_batch_nodes += 1
+        if updated_chunk_nodes or updated_batch_nodes:
+            log.info(
+                "Dynamic SAM2 batch size: %s frames (source=%sx%s, target_bytes=%s, est_bpp=%s, template=%s, chunk_nodes=%s, batch_nodes=%s)",
+                dynamic_frames,
+                width,
+                height,
+                self._sam2_target_batch_bytes(),
+                round(estimated_bpp, 2),
+                template_id or "unknown",
+                updated_chunk_nodes,
+                updated_batch_nodes,
+            )
+
+    def templates_for_context(self, source_file=None):
+        templates = self.template_registry.templates_for_context(source_file=source_file)
+        return [
+            {"id": t.get("id"), "name": t.get("display_name"), "template": t}
+            for t in templates
+        ]
+
+    def build_menu_templates(self, source_file=None):
+        grouped = {"create": [], "enhance": [], "unknown": []}
+        for template in self.template_registry.templates_for_context(source_file=source_file):
+            category = str(template.get("category", "unknown"))
+            if category not in grouped:
+                category = "unknown"
+            grouped[category].append(template)
+        return grouped
+
+    def icon_for_template(self, template):
+        return self.template_registry.output_icon_name(template)
+
+    def _prepare_nonlegacy_workflow(
+        self,
+        template,
+        payload_name,
+        prompt_text,
+        source_file,
+        source_path,
+        coordinates_positive_text="",
+        coordinates_negative_text="",
+        rectangles_positive_text="",
+        rectangles_negative_text="",
+        auto_mode=False,
+        tracking_selection=None,
+        highlight_color="",
+        highlight_opacity=0.0,
+        border_color="",
+        border_width=0,
+        mask_brightness=1.0,
+        background_brightness=1.0,
+    ):
+        workflow = self.template_registry.get_workflow_copy(template.get("id"))
+        if not workflow:
+            raise ValueError("Template workflow not found.")
+
+        template_dir = ""
+        template_path = str((template or {}).get("path") or "").strip()
+        if template_path:
+            template_dir = os.path.dirname(template_path)
+
+        def _resolve_template_local_file(path_text):
+            path_text = str(path_text or "").strip()
+            if not path_text:
+                return ""
+            if os.path.isabs(path_text):
+                return path_text if os.path.exists(path_text) else ""
+            if not template_dir:
+                return ""
+            candidate = os.path.abspath(os.path.join(template_dir, path_text))
+            if os.path.exists(candidate):
+                return candidate
+            return ""
+
+        template_id = str((template or {}).get("id") or "").strip().lower()
+        prompt_text = str(prompt_text or "").strip()
+        music_prompt_text = prompt_text
+        music_lyrics_text = ""
+        if template_id == "txt2music-ace-step" and prompt_text:
+            # Optional inline format:
+            #   <style/tags text>
+            #   Lyrics:
+            #   <lyrics lines...>
+            split_match = re.split(r"(?im)^\s*lyrics\s*:\s*", prompt_text, maxsplit=1)
+            if len(split_match) == 2:
+                music_prompt_text = str(split_match[0] or "").strip()
+                music_lyrics_text = str(split_match[1] or "").strip()
+            if not music_prompt_text:
+                music_prompt_text = prompt_text
+        coordinates_positive_text = str(coordinates_positive_text or "").strip()
+        coordinates_negative_text = str(coordinates_negative_text or "").strip()
+        rectangles_positive_text = str(rectangles_positive_text or "").strip()
+        rectangles_negative_text = str(rectangles_negative_text or "").strip()
+        auto_mode = bool(auto_mode)
+        tracking_selection = tracking_selection if isinstance(tracking_selection, dict) else {}
+        highlight_color = str(highlight_color or "").strip()
+        border_color = str(border_color or "").strip()
+        try:
+            highlight_opacity = float(highlight_opacity)
+        except (TypeError, ValueError):
+            highlight_opacity = 0.0
+        try:
+            border_width = int(border_width)
+        except (TypeError, ValueError):
+            border_width = 0
+        try:
+            mask_brightness = float(mask_brightness)
+        except (TypeError, ValueError):
+            mask_brightness = 1.0
+        try:
+            background_brightness = float(background_brightness)
+        except (TypeError, ValueError):
+            background_brightness = 1.0
+        media_type = str(source_file.data.get("media_type", "")).strip().lower() if source_file else ""
+        music_seed = random.randint(1, 2**31 - 1)
+        applied_prompt = False
+        loadimage_node_ids = []
+        loadvideo_node_ids = []
+        loadaudio_node_ids = []
+
+        for node_id, node in workflow.items():
+            if not isinstance(node, dict):
+                continue
+            class_flat = str(node.get("class_type", "")).strip().lower()
+            if class_flat == "loadimage":
+                loadimage_node_ids.append(str(node_id))
+            elif class_flat in ("loadvideo", "load video", "vhs_loadvideo", "vhs_loadvideopath", "vhs_loadvideoffmpegpath"):
+                loadvideo_node_ids.append(str(node_id))
+            elif class_flat in ("loadaudio", "load audio"):
+                loadaudio_node_ids.append(str(node_id))
+
+        def _is_placeholder_value(path_text):
+            path_text = str(path_text or "").strip().lower()
+            return path_text in ("__openshot_input__", "{{openshot_input}}", "$openshot_input")
+
+        def _is_prompt_placeholder_value(text_value):
+            text_value = str(text_value or "").strip().lower()
+            return text_value in ("__openshot_prompt__", "{{openshot_prompt}}", "$openshot_prompt")
+
+        def _is_lyrics_placeholder_value(text_value):
+            text_value = str(text_value or "").strip().lower()
+            return text_value in ("__openshot_lyrics__", "{{openshot_lyrics}}", "$openshot_lyrics")
+
+        def _normalize_sam2_coords_input(text_value, fallback_value):
+            text_value = str(text_value or "").strip()
+            fallback_value = str(fallback_value or "").strip()
+            if not text_value:
+                return fallback_value
+            # Accept raw JSON list format expected by Sam2VideoSegmentationAddPoints.
+            if text_value.startswith("[") and "x" in text_value and "y" in text_value:
+                return text_value
+
+            # Also accept "x,y; x,y" shorthand and convert to JSON list.
+            points = []
+            for chunk in text_value.split(";"):
+                chunk = chunk.strip()
+                if not chunk:
+                    continue
+                parts = [p.strip() for p in chunk.split(",")]
+                if len(parts) != 2:
+                    return fallback_value
+                try:
+                    x_val = float(parts[0])
+                    y_val = float(parts[1])
+                except (TypeError, ValueError):
+                    return fallback_value
+                points.append({"x": x_val, "y": y_val})
+            if not points:
+                return fallback_value
+            return str(points).replace("'", "\"")
+
+        def _parse_sam2_points(coords_text):
+            coords_text = str(coords_text or "").strip()
+            if not coords_text:
+                return []
+            try:
+                parsed = json.loads(coords_text.replace("'", "\""))
+            except Exception:
+                return []
+            if not isinstance(parsed, list):
+                return []
+            points = []
+            for item in parsed:
+                if not isinstance(item, dict):
+                    continue
+                if "x" not in item or "y" not in item:
+                    continue
+                try:
+                    points.append({"x": float(item["x"]), "y": float(item["y"])})
+                except Exception:
+                    continue
+            return points
+
+        def _select_bind_nodes(node_ids, path_keys, preferred_upload=None):
+            if isinstance(path_keys, str):
+                path_keys = [path_keys]
+            explicit = []
+            candidates = []
+            for node_id in node_ids:
+                node = workflow.get(node_id, {})
+                inputs = node.get("inputs", {}) if isinstance(node, dict) else {}
+                if not isinstance(inputs, dict):
+                    continue
+                path_value = ""
+                for path_key in path_keys:
+                    candidate_value = str(inputs.get(path_key, "")).strip()
+                    if candidate_value:
+                        path_value = candidate_value
+                        break
+                upload_value = str(inputs.get("upload", "")).strip().lower()
+                if _is_placeholder_value(path_value):
+                    explicit.append(node_id)
+                    continue
+                if preferred_upload and upload_value == preferred_upload:
+                    explicit.append(node_id)
+                    continue
+                if not path_value:
+                    candidates.append(node_id)
+            if explicit:
+                return set(explicit)
+            if candidates:
+                return {candidates[0]}
+            if node_ids:
+                return {node_ids[0]}
+            return set()
+
+        image_bind_nodes = _select_bind_nodes(loadimage_node_ids, ["image"], preferred_upload="image")
+        video_bind_nodes = _select_bind_nodes(loadvideo_node_ids, ["file", "video"])
+        audio_bind_nodes = _select_bind_nodes(loadaudio_node_ids, ["audio", "file"])
+
+        for node_id, node in workflow.items():
+            if not isinstance(node, dict):
+                continue
+            class_type = str(node.get("class_type", "")).strip()
+            inputs = node.get("inputs", {})
+            if not isinstance(inputs, dict):
+                continue
+
+            class_flat = class_type.lower().strip()
+
+            # Resolve generic OpenShot source placeholders in any string input
+            # (custom nodes may use keys like `video_path` instead of `video`/`file`).
+            if source_path:
+                for input_key, input_value in list(inputs.items()):
+                    if isinstance(input_value, str) and _is_placeholder_value(input_value):
+                        inputs[input_key] = source_path
+
+            if "filename_prefix" in inputs:
+                prefix_value = str(inputs.get("filename_prefix", "")).strip()
+                if "/" in prefix_value:
+                    head, tail = prefix_value.rsplit("/", 1)
+                    tail = str(tail or "output").strip()
+                    inputs["filename_prefix"] = "{}/{}_{}".format(head, tail, payload_name)
+                else:
+                    inputs["filename_prefix"] = payload_name
+
+            if template_id == "txt2music-ace-step" and "seed" in inputs:
+                try:
+                    node_seed_offset = int(inputs.get("seed", 0))
+                except Exception:
+                    node_seed_offset = 0
+                inputs["seed"] = int((music_seed + node_seed_offset) % (2**31 - 1)) or 1
+
+            if prompt_text:
+                text_value = inputs.get("text", None)
+                prompt_value = inputs.get("prompt", None)
+                tags_value = inputs.get("tags", None)
+                lyrics_value = inputs.get("lyrics", None)
+
+                if isinstance(text_value, str) and _is_prompt_placeholder_value(text_value):
+                    inputs["text"] = prompt_text
+                    applied_prompt = True
+                elif isinstance(prompt_value, str) and _is_prompt_placeholder_value(prompt_value):
+                    inputs["prompt"] = prompt_text
+                    applied_prompt = True
+                elif isinstance(tags_value, str) and _is_prompt_placeholder_value(tags_value):
+                    # Support non-legacy music nodes that use "tags" instead of "text"/"prompt".
+                    if template_id == "txt2music-ace-step":
+                        inputs["tags"] = music_prompt_text
+                    else:
+                        inputs["tags"] = prompt_text
+                    applied_prompt = True
+                elif isinstance(lyrics_value, str) and _is_lyrics_placeholder_value(lyrics_value):
+                    if template_id == "txt2music-ace-step":
+                        inputs["lyrics"] = music_lyrics_text
+                    else:
+                        inputs["lyrics"] = ""
+                elif class_flat == "cliptextencode" and "text" in inputs and not applied_prompt:
+                    inputs["text"] = prompt_text
+                    applied_prompt = True
+                elif "prompt" in inputs and isinstance(prompt_value, str) and not prompt_value.strip() and not applied_prompt:
+                    # Support prompt-driven custom nodes (e.g. GroundingDINO/SAM2) that expose a plain string prompt input.
+                    inputs["prompt"] = prompt_text
+                    applied_prompt = True
+
+            if class_flat in (
+                "sam2videosegmentationaddpoints",
+                "sam2segmentation",
+                "openshotsam2videosegmentationaddpoints",
+                "openshotsam2segmentation",
+            ):
+                coords_text = coordinates_positive_text
+                points = _parse_sam2_points(coords_text)
+                has_positive_rects = bool(rectangles_positive_text)
+
+                seed_frame_idx = 0
+                if isinstance(tracking_selection, dict):
+                    try:
+                        seed_frame_idx = max(0, int(tracking_selection.get("seed_frame", 1)) - 1)
+                    except Exception:
+                        seed_frame_idx = 0
+                if "frame_index" in inputs:
+                    try:
+                        inputs["frame_index"] = int(seed_frame_idx)
+                    except Exception:
+                        pass
+                if "tracking_selection_json" in inputs:
+                    try:
+                        inputs["tracking_selection_json"] = json.dumps(tracking_selection or {})
+                    except Exception:
+                        inputs["tracking_selection_json"] = "{}"
+                if "dino_prompt" in inputs:
+                    inputs["dino_prompt"] = str(prompt_text or "")
+
+                has_dino_prompt = bool(str(prompt_text or "").strip()) and ("dino_prompt" in inputs)
+
+                auto_enabled = bool(inputs.get("auto_mode", False)) or auto_mode
+                if "auto_mode" in inputs:
+                    inputs["auto_mode"] = bool(auto_enabled)
+                if ("anything-sam2" in template_id) and (not points) and (not has_positive_rects) and (not auto_enabled) and (not has_dino_prompt):
+                    raise ValueError("No SAM2 seed was provided. Use Points, Rectangle, or Auto mode.")
+
+                # New OpenShot node contract.
+                if "positive_points_json" in inputs and isinstance(inputs.get("positive_points_json", None), str):
+                    inputs["positive_points_json"] = _normalize_sam2_coords_input(
+                        coords_text,
+                        str(inputs.get("positive_points_json", "")),
+                    )
+                # Backward compatibility for third-party node variants.
+                elif "coordinates_positive" in inputs and isinstance(inputs.get("coordinates_positive", None), str):
+                    inputs["coordinates_positive"] = _normalize_sam2_coords_input(
+                        coords_text,
+                        str(inputs.get("coordinates_positive", "")),
+                    )
+
+                if class_flat in ("sam2segmentation", "openshotsam2segmentation") and "individual_objects" in inputs:
+                    # For Blur Anything, treat points as a single combined prompt.
+                    # This is more stable with mixed positive/negative points and avoids
+                    # per-object mask selection quirks in the current SAM2 single-image node.
+                    if "blur-anything-sam2" in template_id:
+                        inputs["individual_objects"] = False
+                    else:
+                        # Non-Blur-Anything templates keep multi-object behavior.
+                        inputs["individual_objects"] = bool(len(points) > 1)
+
+                if coordinates_negative_text:
+                    if "negative_points_json" in inputs and isinstance(inputs.get("negative_points_json", None), str):
+                        inputs["negative_points_json"] = _normalize_sam2_coords_input(
+                            coordinates_negative_text,
+                            str(inputs.get("negative_points_json", "")),
+                        )
+                    elif "coordinates_negative" in inputs:
+                        neg_value = inputs.get("coordinates_negative", "")
+                        if isinstance(neg_value, str) or "coordinates_negative" not in inputs:
+                            inputs["coordinates_negative"] = _normalize_sam2_coords_input(
+                                coordinates_negative_text,
+                                str(neg_value or ""),
+                            )
+                if rectangles_positive_text and ("positive_rects_json" in inputs) and isinstance(
+                    inputs.get("positive_rects_json", None), str
+                ):
+                    inputs["positive_rects_json"] = rectangles_positive_text
+                if rectangles_negative_text and ("negative_rects_json" in inputs) and isinstance(
+                    inputs.get("negative_rects_json", None), str
+                ):
+                    inputs["negative_rects_json"] = rectangles_negative_text
+
+            if class_flat in ("openshotimagehighlightmasked",) or class_type.strip() == "OpenShotImageHighlightMasked":
+                if "highlight_color" in inputs and highlight_color:
+                    inputs["highlight_color"] = highlight_color
+                if "highlight_opacity" in inputs:
+                    inputs["highlight_opacity"] = float(max(0.0, min(1.0, highlight_opacity)))
+                if "border_color" in inputs and border_color:
+                    inputs["border_color"] = border_color
+                if "border_width" in inputs:
+                    inputs["border_width"] = int(max(0, border_width))
+                if "mask_brightness" in inputs:
+                    inputs["mask_brightness"] = float(max(0.0, min(3.0, mask_brightness)))
+                if "background_brightness" in inputs:
+                    inputs["background_brightness"] = float(max(0.0, min(3.0, background_brightness)))
+
+            if not source_path:
+                continue
+
+            node_id = str(node_id)
+
+            if class_flat == "loadimage" and media_type == "image" and node_id in image_bind_nodes:
+                if "image" in inputs:
+                    inputs["image"] = source_path
+                if "upload" in inputs:
+                    inputs["upload"] = "image"
+            elif class_flat == "loadimage":
+                # Resolve template-local reference images (relative filenames) to absolute paths,
+                # so ComfyClient can upload and rewrite them to [input] automatically.
+                configured_image = str(inputs.get("image", "")).strip()
+                local_image = _resolve_template_local_file(configured_image)
+                if local_image:
+                    inputs["image"] = local_image
+                    if "upload" in inputs:
+                        inputs["upload"] = "image"
+                elif media_type == "image" and source_path:
+                    # If additional reference images are missing, gracefully fallback to the selected source image.
+                    # This keeps common exported workflows usable without hand-editing filenames.
+                    missing_relative = configured_image and (not os.path.isabs(configured_image))
+                    missing_absolute = os.path.isabs(configured_image) and (not os.path.exists(configured_image))
+                    if missing_relative or missing_absolute:
+                        inputs["image"] = source_path
+                        if "upload" in inputs:
+                            inputs["upload"] = "image"
+                        log.warning(
+                            "Comfy template missing LoadImage asset (%s). Falling back to selected source image.",
+                            configured_image,
+                        )
+            elif class_flat in ("loadvideo", "load video", "vhs_loadvideopath", "vhs_loadvideoffmpegpath") and media_type == "video" and node_id in video_bind_nodes:
+                if "file" in inputs:
+                    inputs["file"] = source_path
+                elif "video" in inputs:
+                    inputs["video"] = source_path
+            elif class_flat in ("loadvideo", "load video", "vhs_loadvideopath", "vhs_loadvideoffmpegpath"):
+                local_video = _resolve_template_local_file(inputs.get("file", ""))
+                if local_video and "file" in inputs:
+                    inputs["file"] = local_video
+            elif class_flat == "vhs_loadvideo" and media_type == "video" and node_id in video_bind_nodes:
+                if "video" in inputs:
+                    inputs["video"] = source_path
+            elif class_flat in ("loadaudio", "load audio") and media_type == "audio" and node_id in audio_bind_nodes:
+                if "audio" in inputs:
+                    inputs["audio"] = source_path
+                elif "file" in inputs:
+                    inputs["file"] = source_path
+            elif class_flat in ("loadaudio", "load audio"):
+                local_audio = _resolve_template_local_file(inputs.get("audio", "") or inputs.get("file", ""))
+                if local_audio:
+                    if "audio" in inputs:
+                        inputs["audio"] = local_audio
+                    elif "file" in inputs:
+                        inputs["file"] = local_audio
+
+        if media_type == "image" and image_bind_nodes:
+            log.debug("Comfy template image input binding nodes=%s source=%s", sorted(image_bind_nodes), source_path)
+        if media_type == "video" and video_bind_nodes:
+            log.debug("Comfy template video input binding nodes=%s source=%s", sorted(video_bind_nodes), source_path)
+        if media_type == "audio" and audio_bind_nodes:
+            log.debug("Comfy template audio input binding nodes=%s source=%s", sorted(audio_bind_nodes), source_path)
+
+        self._apply_dynamic_sam2_meta_batch(workflow, source_file=source_file, template_id=template_id)
+
+        return workflow
+
+    def _save_nodes_for_workflow(self, workflow, template_id=None):
+        template_id = str(template_id or "").strip().lower()
+        save_nodes = []
+        for node_id, node in workflow.items():
+            if not isinstance(node, dict):
+                continue
+            class_type = str(node.get("class_type", "")).strip().lower()
+            if not class_type:
+                continue
+            if class_type.startswith("save") or class_type in (
+                "previewany",
+                "transnetv2_run",
+                "openshottransnetscenedetect",
+                "vhs_videocombine",
+            ):
+                if class_type == "vhs_videocombine":
+                    inputs = node.get("inputs", {}) if isinstance(node.get("inputs", {}), dict) else {}
+                    prefix = str(inputs.get("filename_prefix", "")).strip().lower()
+                    is_mask_output = ("openshot_mask" in prefix)
+                    is_track_template = template_id in (
+                        "video-blur-anything-sam2",
+                        "video-highlight-anything-sam2",
+                        "video-mask-anything-sam2",
+                    )
+                    if is_track_template:
+                        if template_id == "video-mask-anything-sam2":
+                            if not is_mask_output:
+                                continue
+                        else:
+                            if is_mask_output:
+                                continue
+                save_nodes.append(str(node_id))
+        return save_nodes
+
+    def action_generate_trigger(self, checked=True, source_file=None, template_id=None, open_dialog=True):
+        selected_files = [source_file] if source_file else self.win.selected_files()
+        if len(selected_files) > 1:
+            return
+
+        if not self.is_comfy_available(force=True):
+            msg = QMessageBox(self.win)
+            msg.setWindowTitle("ComfyUI Unavailable")
+            msg.setText(
+                "OpenShot could not connect to ComfyUI at:\n{}\n\n"
+                "Start ComfyUI or update the URL in Preferences > Experimental.".format(self.comfy_ui_url())
+            )
+            msg.exec_()
+            return
+
+        source_file = selected_files[0] if selected_files else None
+        templates = self.templates_for_context(source_file=source_file)
+        available_template_ids = {str(t.get("id", "")).strip() for t in templates}
+        if open_dialog:
+            dialog_title = "Enhance with AI" if source_file else "Create with AI"
+            win = GenerateMediaDialog(
+                source_file=source_file,
+                templates=templates,
+                preselected_template_id=template_id,
+                dialog_title=dialog_title,
+                parent=self.win,
+            )
+            if win.exec_() != QDialog.Accepted:
+                return
+            payload = win.get_payload()
+        else:
+            selected_template_id = str(template_id or "").strip()
+            if not selected_template_id:
+                return
+            if selected_template_id not in available_template_ids:
+                QMessageBox.information(
+                    self.win,
+                    "Invalid Input",
+                    "The selected AI action is not available for this source type.",
+                )
+                return
+            payload = {
+                "name": self._default_generation_name(source_file),
+                "template_id": selected_template_id,
+                "prompt": "",
+            }
+
+        payload_name = self._next_generation_name(payload.get("name"))
+        source_file_id = source_file.id if source_file else None
+        template_meta = self.template_registry.get_template(payload.get("template_id"))
+        if not template_meta:
+            QMessageBox.information(self.win, "Invalid Input", "The selected AI template was not found.")
+            return
+        try:
+            source_path = self._prepare_generation_source_path(source_file, payload.get("template_id"))
+        except Exception as ex:
+            QMessageBox.warning(
+                self.win,
+                "Source Conversion Failed",
+                "OpenShot could not convert this image into PNG for ComfyUI.\n\n{}".format(ex),
+            )
+            return
+        pipeline_id = payload.get("template_id")
+        checkpoint_name = None
+        upscale_model_name = None
+        stable_audio_clip_name = None
+        svd_checkpoint_name = None
+        rife_model_name = None
+        client = ComfyClient(self.comfy_ui_url())
+        workflow_source = source_path
+
+        if pipeline_id in (
+            "video-upscale-gan",
+            "video2video-basic",
+            "video-whisper-srt",
+            "video-frame-interpolation-rife2x",
+            "video-segment-scenes-transnet",
+        ):
+            if not source_file or source_file.data.get("media_type") != "video":
+                QMessageBox.information(self.win, "Invalid Input", "This pipeline requires a source video file.")
+                return
+            try:
+                workflow_source = self._prepare_generation_video_input(source_file, client)
+            except Exception as ex:
+                QMessageBox.warning(
+                    self.win,
+                    "Video Upload Failed",
+                    "OpenShot could not upload the source video into ComfyUI input.\n\n{}".format(ex),
+                )
+                return
+        elif pipeline_id in ("img2img-basic", "upscale-realesrgan-x4", "img2video-svd"):
+            try:
+                workflow_source = self._prepare_generation_image_input(source_path, client)
+            except Exception as ex:
+                QMessageBox.warning(
+                    self.win,
+                    "Image Upload Failed",
+                    "OpenShot could not upload the source image into ComfyUI input.\n\n{}".format(ex),
+                )
+                return
+
+        if pipeline_id in self.LEGACY_PIPELINE_IDS:
+            try:
+                checkpoint_names = []
+                if pipeline_requires_checkpoint(pipeline_id) or pipeline_requires_svd_checkpoint(pipeline_id):
+                    checkpoint_names = client.list_checkpoints()
+                    if checkpoint_names:
+                        preferred_checkpoint = DEFAULT_SD_CHECKPOINT
+                        if pipeline_id == "txt2audio-stable-open":
+                            preferred_checkpoint = DEFAULT_STABLE_AUDIO_CHECKPOINT
+                        elif pipeline_id == "video2video-basic":
+                            preferred_checkpoint = DEFAULT_SD_BASE_CHECKPOINT
+                        checkpoint_name = (
+                            preferred_checkpoint if preferred_checkpoint in checkpoint_names else checkpoint_names[0]
+                        )
+                    if pipeline_requires_svd_checkpoint(pipeline_id):
+                        if DEFAULT_SVD_CHECKPOINT in checkpoint_names:
+                            svd_checkpoint_name = DEFAULT_SVD_CHECKPOINT
+                        else:
+                            svd_candidates = [name for name in checkpoint_names if "svd" in str(name).lower()]
+                            if svd_candidates:
+                                svd_checkpoint_name = svd_candidates[0]
+            except Exception as ex:
+                log.warning("Failed to query ComfyUI checkpoints: %s", ex)
+
+            if pipeline_requires_checkpoint(pipeline_id) and not checkpoint_name:
+                QMessageBox.information(
+                    self.win,
+                    "No Checkpoints Found",
+                    "ComfyUI has no checkpoints available for CheckpointLoaderSimple.\n"
+                    "Add a model to ComfyUI/models/checkpoints and try again.",
+                )
+                return
+
+            if pipeline_requires_svd_checkpoint(pipeline_id) and not svd_checkpoint_name:
+                QMessageBox.information(
+                    self.win,
+                    "No SVD Checkpoint Found",
+                    "ComfyUI could not find the SVD checkpoint required for the selected video generation template.\n"
+                    "Add an SVD checkpoint (for example {}) to ComfyUI/models/checkpoints and try again.".format(DEFAULT_SVD_CHECKPOINT),
+                )
+                return
+
+            try:
+                if pipeline_requires_upscale_model(pipeline_id):
+                    upscale_models = client.list_upscale_models()
+                    if upscale_models:
+                        upscale_model_name = (
+                            DEFAULT_UPSCALE_MODEL if DEFAULT_UPSCALE_MODEL in upscale_models else upscale_models[0]
+                        )
+            except Exception as ex:
+                log.warning("Failed to query ComfyUI upscale models: %s", ex)
+
+            if pipeline_requires_upscale_model(pipeline_id) and not upscale_model_name:
+                QMessageBox.information(
+                    self.win,
+                    "No Upscale Models Found",
+                    "ComfyUI has no upscaler models available for UpscaleModelLoader.\n"
+                    "Add a model such as RealESRGAN_x4plus.safetensors to ComfyUI/models/upscale_models and try again.",
+                )
+                return
+
+            try:
+                if pipeline_requires_stable_audio_clip(pipeline_id):
+                    clip_names = client.list_clip_models()
+                    if clip_names:
+                        for preferred in (DEFAULT_STABLE_AUDIO_CLIP, "t5_base.safetensors"):
+                            if preferred in clip_names:
+                                stable_audio_clip_name = preferred
+                                break
+                        if not stable_audio_clip_name:
+                            stable_audio_clip_name = clip_names[0]
+            except Exception as ex:
+                log.warning("Failed to query ComfyUI CLIP models: %s", ex)
+
+            if pipeline_requires_stable_audio_clip(pipeline_id) and not stable_audio_clip_name:
+                QMessageBox.information(
+                    self.win,
+                    "No Text Encoders Found",
+                    "ComfyUI has no CLIP/text-encoder models available for CLIPLoader.\n"
+                    "Add a text encoder such as t5-base.safetensors and try again.",
+                )
+                return
+
+            try:
+                if pipeline_requires_rife_model(pipeline_id):
+                    rife_models = client.list_rife_vfi_models()
+                    if rife_models:
+                        for preferred in (DEFAULT_RIFE_VFI_MODEL, "rife49.pth"):
+                            if preferred in rife_models:
+                                rife_model_name = preferred
+                                break
+                        if not rife_model_name:
+                            rife_model_name = rife_models[0]
+            except Exception as ex:
+                log.warning("Failed to query ComfyUI RIFE VFI models: %s", ex)
+
+            if pipeline_requires_rife_model(pipeline_id) and not rife_model_name:
+                QMessageBox.information(
+                    self.win,
+                    "RIFE VFI Not Available",
+                    "ComfyUI could not find the RIFE VFI node/models required for frame interpolation.\n"
+                    "Install ComfyUI-Frame-Interpolation and add models such as rife47.pth.",
+                )
+                return
+
+            try:
+                workflow = build_workflow(
+                    pipeline_id,
+                    payload.get("prompt"),
+                    workflow_source,
+                    payload_name,
+                    checkpoint_name=checkpoint_name,
+                    upscale_model_name=upscale_model_name,
+                    stable_audio_clip_name=stable_audio_clip_name,
+                    svd_checkpoint_name=svd_checkpoint_name,
+                    source_fps=self._get_source_fps(source_file),
+                    rife_model_name=rife_model_name,
+                )
+            except Exception as ex:
+                QMessageBox.information(self.win, "Invalid Input", str(ex))
+                return
+        else:
+            try:
+                workflow = self._prepare_nonlegacy_workflow(
+                    template_meta,
+                    payload_name=payload_name,
+                    prompt_text=payload.get("prompt"),
+                    source_file=source_file,
+                    source_path=source_path,
+                    coordinates_positive_text=payload.get("coordinates_positive"),
+                    coordinates_negative_text=payload.get("coordinates_negative"),
+                    rectangles_positive_text=payload.get("rectangles_positive"),
+                    rectangles_negative_text=payload.get("rectangles_negative"),
+                    auto_mode=payload.get("auto_mode"),
+                    tracking_selection=payload.get("tracking_selection"),
+                    highlight_color=payload.get("highlight_color"),
+                    highlight_opacity=payload.get("highlight_opacity"),
+                    border_color=payload.get("border_color"),
+                    border_width=payload.get("border_width"),
+                    mask_brightness=payload.get("mask_brightness"),
+                    background_brightness=payload.get("background_brightness"),
+                )
+            except Exception as ex:
+                QMessageBox.information(self.win, "Invalid Input", str(ex))
+                return
+        request = {
+            "comfy_url": self.comfy_ui_url(),
+            "workflow": workflow,
+            "client_id": "openshot-qt",
+            "timeout_s": 21600,
+            "save_node_ids": self._save_nodes_for_workflow(workflow, template_id=payload.get("template_id")),
+            "template_id": str(payload.get("template_id") or ""),
+        }
+        job_id = self.win.generation_queue.enqueue(
+            payload_name,
+            payload.get("template_id"),
+            payload.get("prompt"),
+            source_file_id=source_file_id,
+            request=request,
+        )
+        if not job_id:
+            QMessageBox.information(
+                self.win,
+                "Generation Already Active",
+                "Only one active generation is allowed per source file.",
+            )
+            return
+
+        self.win.statusBar.showMessage("Queued generation job", 3000)
+
+    def on_generation_job_finished(self, job_id, status):
+        job = self.win.generation_queue.get_job(job_id) if getattr(self.win, "generation_queue", None) else None
+        if not job:
+            return
+
+        if status == "completed":
+            result = self._import_generation_outputs(job)
+            imported = int(result.get("imported", 0))
+            caption_saved = bool(result.get("caption_saved", False))
+            scenes_labeled = int(result.get("scenes_labeled", 0))
+            scene_splits_created = int(result.get("scene_splits_created", 0))
+            if imported > 0 and caption_saved:
+                self.win.statusBar.showMessage(
+                    "Generation completed, imported {} file(s), and saved file caption data".format(imported),
+                    5000,
+                )
+            elif scene_splits_created > 0:
+                self.win.statusBar.showMessage(
+                    "Generation completed and created {} scene split file(s)".format(scene_splits_created),
+                    5000,
+                )
+            elif imported > 0 and scenes_labeled > 0:
+                self.win.statusBar.showMessage(
+                    "Generation completed, imported {} file(s), and labeled {} scene segment(s)".format(
+                        imported, scenes_labeled
+                    ),
+                    5000,
+                )
+            elif imported > 0:
+                self.win.statusBar.showMessage("Generation completed and imported {} file(s)".format(imported), 5000)
+            elif caption_saved:
+                self.win.statusBar.showMessage("Generation completed and saved file caption data", 5000)
+            else:
+                self.win.statusBar.showMessage("Generation completed (no output files found)", 5000)
+            return
+
+        if status == "canceled":
+            self.win.statusBar.showMessage("Generation canceled", 3000)
+            return
+
+        if status == "failed":
+            error_text = ComfyClient.summarize_error_text(job.get("error") or "ComfyUI generation failed.")
+            self.win.statusBar.showMessage("Generation failed", 5000)
+            QMessageBox.warning(self.win, "Generation Failed", error_text)
+
+    def _import_generation_outputs(self, job):
+        outputs = list(job.get("outputs", []) or [])
+        if not outputs:
+            return {"imported": 0, "caption_saved": False, "scene_splits_created": 0}
+
+        request = job.get("request", {}) or {}
+        comfy_url = str(request.get("comfy_url") or self.comfy_ui_url())
+        client = ComfyClient(comfy_url)
+        output_dir = info.COMFYUI_OUTPUT_PATH
+        os.makedirs(output_dir, exist_ok=True)
+        template_id = str(job.get("template_id") or "").strip().lower()
+
+        name_raw = str(job.get("name") or "generation")
+        safe_name = re.sub(r"[^A-Za-z0-9._-]+", "_", name_raw).strip("._")
+        if not safe_name:
+            safe_name = "generation"
+
+        saved_paths = []
+        text_outputs = []
+        scene_splits_created = 0
+        video_path_exts = {".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"}
+        seen_video_payload_paths = set()
+        for index, output_ref in enumerate(outputs, start=1):
+            text_payload = str(output_ref.get("text", "")).strip()
+            if text_payload:
+                if template_id == "video-segment-scenes-transnet":
+                    scene_ranges = self._extract_scene_ranges_from_text(text_payload)
+                    if scene_ranges:
+                        created = self._create_scene_split_files(
+                            source_file_id=job.get("source_file_id"),
+                            scene_ranges=scene_ranges,
+                        )
+                        scene_splits_created += created
+                        if created > 0:
+                            continue
+                payload_video_paths = self._extract_video_paths_from_text(text_payload)
+                if not payload_video_paths:
+                    payload_ext = os.path.splitext(text_payload)[1].lower()
+                    if payload_ext in video_path_exts:
+                        payload_video_paths = [text_payload]
+                downloaded_any_video = False
+                for raw_video_path in payload_video_paths:
+                    norm_video_path = str(raw_video_path).strip().replace("\\", "/")
+                    if not norm_video_path or norm_video_path in seen_video_payload_paths:
+                        continue
+                    seen_video_payload_paths.add(norm_video_path)
+
+                    payload_ext = os.path.splitext(norm_video_path)[1].lower() or ".mp4"
+                    video_ref = self._comfy_output_ref_from_path(norm_video_path)
+                    if not video_ref:
+                        continue
+                    local_name = "{}_{}{}".format(safe_name, str(index).zfill(3), payload_ext)
+                    local_path = self._next_available_path(os.path.join(output_dir, local_name))
+                    try:
+                        client.download_output_file(video_ref, local_path)
+                        saved_paths.append(local_path)
+                        downloaded_any_video = True
+                    except Exception as ex:
+                        log.warning(
+                            "Failed to download segmented video from Comfy path output %s: %s",
+                            raw_video_path,
+                            ex,
+                        )
+                if downloaded_any_video:
+                    continue
+
+                # Some Save SRT node variants return the output file path as text.
+                # Convert that path to a downloadable Comfy output ref when possible.
+                if text_payload.lower().endswith(".srt"):
+                    srt_ref = self._comfy_output_ref_from_path(text_payload)
+                    if srt_ref:
+                        local_name = "{}_{}{}".format(safe_name, str(index).zfill(3), ".srt")
+                        local_path = self._next_available_path(os.path.join(output_dir, local_name))
+                        try:
+                            client.download_output_file(srt_ref, local_path)
+                            with open(local_path, "r", encoding="utf-8") as handle:
+                                srt_text = handle.read().strip()
+                            if srt_text:
+                                saved_paths.append(local_path)
+                                text_outputs.append(srt_text)
+                                continue
+                        except Exception as ex:
+                            log.warning("Failed to download/read SRT from Comfy path output %s: %s", text_payload, ex)
+
+                ext = ".srt" if str(output_ref.get("format", "")).lower() == "srt" else ".txt"
+                local_name = "{}_{}{}".format(safe_name, str(index).zfill(3), ext)
+                local_path = self._next_available_path(os.path.join(output_dir, local_name))
+                try:
+                    with open(local_path, "w", encoding="utf-8") as handle:
+                        handle.write(text_payload)
+                    saved_paths.append(local_path)
+                    text_outputs.append(text_payload)
+                except Exception as ex:
+                    log.warning("Failed to write Comfy text output to %s: %s", local_path, ex)
+                continue
+
+            original_name = str(output_ref.get("filename", "output.png"))
+            ext = os.path.splitext(original_name)[1] or ".png"
+            local_name = "{}_{}{}".format(safe_name, str(index).zfill(3), ext)
+            local_path = self._next_available_path(os.path.join(output_dir, local_name))
+            try:
+                client.download_output_file(output_ref, local_path)
+                saved_paths.append(local_path)
+            except Exception as ex:
+                log.warning("Failed to download Comfy output %s: %s", output_ref, ex)
+
+        if saved_paths:
+            self.win.files_model.add_files(
+                saved_paths,
+                quiet=True,
+                prevent_image_seq=True,
+                prevent_recent_folder=True,
+            )
+
+        if not saved_paths and scene_splits_created <= 0:
+            return {"imported": 0, "caption_saved": False, "scene_splits_created": 0}
+
+        caption_saved = False
+        scenes_labeled = 0
+        if template_id == "video-whisper-srt":
+            caption_text = self._resolve_caption_text(saved_paths, text_outputs)
+            caption_saved = self._store_caption_on_file(
+                source_file_id=job.get("source_file_id"),
+                caption_text=caption_text,
+            )
+        if template_id == "video-segment-scenes-transnet" and saved_paths:
+            scenes_labeled = self._apply_scene_segment_metadata(
+                source_file_id=job.get("source_file_id"),
+                saved_paths=saved_paths,
+            )
+        return {
+            "imported": len(saved_paths),
+            "caption_saved": caption_saved,
+            "scenes_labeled": scenes_labeled,
+            "scene_splits_created": scene_splits_created,
+        }
+
+    def _extract_video_paths_from_text(self, text_payload):
+        """Extract absolute video file paths from log/text payloads."""
+        text_payload = str(text_payload or "")
+        if not text_payload:
+            return []
+        pattern = re.compile(
+            r"([A-Za-z]:[\\/][^\r\n]+?\.(?:mp4|mov|mkv|webm|avi|m4v)|/[^\r\n]+?\.(?:mp4|mov|mkv|webm|avi|m4v))",
+            re.IGNORECASE,
+        )
+        return [match.strip() for match in pattern.findall(text_payload) if match.strip()]
+
+    def _extract_scene_ranges_from_text(self, text_payload):
+        """Parse scene range metadata JSON from text output payloads."""
+        text_payload = str(text_payload or "").strip()
+        if not text_payload:
+            return []
+        if not text_payload.startswith("{"):
+            first = text_payload.find("{")
+            last = text_payload.rfind("}")
+            if first >= 0 and last > first:
+                text_payload = text_payload[first:last + 1]
+            else:
+                return []
+        try:
+            payload = json.loads(text_payload)
+        except Exception:
+            return []
+
+        segment_entries = payload.get("segments") if isinstance(payload, dict) else None
+        if not isinstance(segment_entries, list):
+            return []
+
+        scene_ranges = []
+        for segment in segment_entries:
+            if not isinstance(segment, dict):
+                continue
+            start_seconds = segment.get("start_seconds", segment.get("start"))
+            end_seconds = segment.get("end_seconds", segment.get("end"))
+            try:
+                start_value = float(start_seconds)
+                end_value = float(end_seconds)
+            except (TypeError, ValueError):
+                continue
+            if end_value <= start_value:
+                continue
+            scene_ranges.append((max(0.0, start_value), max(0.0, end_value)))
+        return scene_ranges
+
+    def _create_scene_split_files(self, source_file_id, scene_ranges):
+        """Create split-style file entries pointing to the same source media path."""
+        source_file = File.get(id=source_file_id) if source_file_id else None
+        if source_file is None:
+            source_file = File.get(id=str(source_file_id or ""))
+        if source_file is None:
+            return 0
+
+        source_data = source_file.data if isinstance(source_file.data, dict) else {}
+        source_path = str(source_data.get("path", "") or "")
+        if not source_path:
+            return 0
+        if str(source_data.get("media_type", "")).lower() != "video":
+            return 0
+
+        fps_data = source_data.get("fps", {})
+        fps_fraction = Fraction(30, 1)
+        try:
+            fps_num = int(fps_data.get("num", 30))
+            fps_den = int(fps_data.get("den", 1) or 1)
+            if fps_num > 0 and fps_den > 0:
+                fps_fraction = Fraction(fps_num, fps_den)
+        except (TypeError, ValueError, ZeroDivisionError):
+            fps_fraction = Fraction(30, 1)
+
+        source_duration = float(source_data.get("duration") or 0.0)
+        base_name = os.path.splitext(os.path.basename(source_path))[0] or "scene"
+
+        created = 0
+        for start_seconds, end_seconds in scene_ranges:
+            start_seconds = max(0.0, float(start_seconds or 0.0))
+            end_seconds = max(start_seconds, float(end_seconds or start_seconds))
+            if source_duration > 0.0:
+                start_seconds = min(start_seconds, source_duration)
+                end_seconds = min(end_seconds, source_duration)
+            if end_seconds <= start_seconds:
+                continue
+
+            include_hours = int(end_seconds // 3600) > 0
+            include_minutes = include_hours or int((end_seconds % 3600) // 60) > 0
+            start_tc = self._seconds_to_compact_timecode(
+                start_seconds,
+                fps_fraction,
+                include_hours=include_hours,
+                include_minutes=include_minutes,
+            )
+            end_tc = self._seconds_to_compact_timecode(
+                end_seconds,
+                fps_fraction,
+                include_hours=include_hours,
+                include_minutes=include_minutes,
+            )
+
+            split_data = json.loads(json.dumps(source_data))
+            split_data.pop("id", None)
+            split_data["start"] = start_seconds
+            split_data["end"] = end_seconds
+            split_data["name"] = "{} ({} to {})".format(
+                base_name,
+                start_tc,
+                end_tc,
+            )
+
+            split_file = File()
+            split_file.id = None
+            split_file.key = None
+            split_file.type = "insert"
+            split_file.data = split_data
+            self._append_scene_tag(split_file)
+            split_file.save()
+            created += 1
+        return created
+
+    def _resolve_caption_text(self, saved_paths, text_outputs):
+        srt_path = ""
+        for path in saved_paths:
+            if str(path).lower().endswith(".srt"):
+                srt_path = path
+                break
+        if srt_path:
+            try:
+                with open(srt_path, "r", encoding="utf-8") as handle:
+                    text = handle.read().strip()
+                if text:
+                    return text
+            except Exception as ex:
+                log.warning("Failed reading SRT file for file caption metadata: %s", ex)
+
+        for value in text_outputs:
+            text = str(value or "").strip()
+            if "-->" in text:
+                return text
+
+        for value in text_outputs:
+            text = str(value or "").strip()
+            if text:
+                return text
+
+        return ""
+
+    def _store_caption_on_file(self, source_file_id, caption_text):
+        caption_text = str(caption_text or "").strip()
+        if not caption_text:
+            return False
+
+        source_file_value = source_file_id
+        file_obj = File.get(id=source_file_value)
+        if file_obj is None:
+            file_obj = File.get(id=str(source_file_value or ""))
+        if file_obj is None:
+            log.info("No source file found for caption metadata update (file_id=%s)", source_file_value)
+            return False
+
+        if not isinstance(file_obj.data, dict):
+            file_obj.data = {}
+        file_obj.data["caption"] = caption_text
+        file_obj.save()
+        self.win.FileUpdated.emit(str(file_obj.id))
+        return True
+
+    def _seconds_to_compact_timecode(self, seconds_value, fps_fraction, include_hours=False, include_minutes=False):
+        fps_fraction = fps_fraction if isinstance(fps_fraction, Fraction) and fps_fraction > 0 else Fraction(30, 1)
+        fps_float = float(fps_fraction)
+        frame_number = int(round(max(0.0, float(seconds_value or 0.0)) * fps_float)) + 1
+        t = time_parts.secondsToTime((frame_number - 1) / fps_float, fps_fraction.numerator, fps_fraction.denominator)
+        hours = int(t.get("hour", 0))
+        minutes = int(t.get("min", 0))
+        secs = int(t.get("sec", 0))
+        frames = int(t.get("frame", 0))
+        if include_hours:
+            return "{:02d}:{:02d}:{:02d};{:02d}".format(hours, minutes, secs, frames)
+        if include_minutes:
+            return "{:02d}:{:02d};{:02d}".format(minutes, secs, frames)
+        return "{:02d};{:02d}".format(secs, frames)
+
+    def _append_scene_tag(self, file_obj):
+        tags_raw = str(file_obj.data.get("tags", "") or "").strip()
+        if not tags_raw:
+            file_obj.data["tags"] = "scene"
+            return
+        tags = [part.strip() for part in tags_raw.split(",") if part.strip()]
+        if any(part.lower() == "scene" for part in tags):
+            return
+        tags.append("scene")
+        file_obj.data["tags"] = ", ".join(tags)
+
+    def _apply_scene_segment_metadata(self, source_file_id, saved_paths):
+        source_file = File.get(id=source_file_id) if source_file_id else None
+        base_name = "scene"
+        fps_fraction = Fraction(30, 1)
+        if source_file:
+            source_path = str(source_file.data.get("path", "") or "")
+            if source_path:
+                base_name = os.path.splitext(os.path.basename(source_path))[0] or base_name
+            fps_data = source_file.data.get("fps", {})
+            try:
+                num = int(fps_data.get("num", 30))
+                den = int(fps_data.get("den", 1) or 1)
+                if num > 0 and den > 0:
+                    fps_fraction = Fraction(num, den)
+            except (TypeError, ValueError, ZeroDivisionError):
+                fps_fraction = Fraction(30, 1)
+
+        imported_files = []
+        for path in saved_paths:
+            file_obj = File.get(path=path)
+            if file_obj and str(file_obj.data.get("media_type", "")) == "video":
+                imported_files.append(file_obj)
+        if not imported_files:
+            return 0
+
+        running_start = 0.0
+        updated = 0
+        for file_obj in imported_files:
+            duration = float(file_obj.data.get("duration") or 0.0)
+            if duration <= 0:
+                start_trim = float(file_obj.data.get("start") or 0.0)
+                end_trim = float(file_obj.data.get("end") or 0.0)
+                duration = max(0.0, end_trim - start_trim)
+            running_end = running_start + max(0.0, duration)
+
+            include_hours = int(running_end // 3600) > 0
+            include_minutes = include_hours or int((running_end % 3600) // 60) > 0
+            start_tc = self._seconds_to_compact_timecode(
+                running_start, fps_fraction, include_hours=include_hours, include_minutes=include_minutes
+            )
+            end_tc = self._seconds_to_compact_timecode(
+                running_end, fps_fraction, include_hours=include_hours, include_minutes=include_minutes
+            )
+            file_obj.data["name"] = "{} ({} to {})".format(base_name, start_tc, end_tc)
+            self._append_scene_tag(file_obj)
+            file_obj.save()
+            self.win.FileUpdated.emit(str(file_obj.id))
+
+            running_start = running_end
+            updated += 1
+
+        return updated
+
+    def _comfy_output_ref_from_path(self, path_text):
+        """Convert a Comfy output absolute/relative path into a /view-compatible output ref."""
+        path_text = unquote(str(path_text or "").strip())
+        if not path_text:
+            return None
+        normalized = path_text.replace("\\", "/")
+        filename = os.path.basename(normalized)
+        if not filename:
+            return None
+
+        subfolder = ""
+        marker = "/output/"
+        if marker in normalized:
+            rel = normalized.split(marker, 1)[1].lstrip("/")
+            rel_dir = os.path.dirname(rel).strip("/")
+            subfolder = rel_dir
+        elif normalized.startswith("output/"):
+            rel = normalized[len("output/"):]
+            rel_dir = os.path.dirname(rel).strip("/")
+            subfolder = rel_dir
+        else:
+            if os.path.isabs(normalized):
+                # Unknown absolute location outside Comfy output tree; fallback to basename only.
+                return {
+                    "filename": filename,
+                    "subfolder": "",
+                    "type": "output",
+                }
+            rel_dir = os.path.dirname(normalized).strip("/")
+            if rel_dir and rel_dir != ".":
+                subfolder = rel_dir
+
+        return {
+            "filename": filename,
+            "subfolder": subfolder,
+            "type": "output",
+        }
+
+    def _next_generation_name(self, requested_name):
+        base = re.sub(r"[^A-Za-z0-9._-]+", "_", str(requested_name or "").strip()).strip("._")
+        if not base:
+            base = "generation"
+
+        existing_names = set()
+        for file_obj in File.filter():
+            if not file_obj:
+                continue
+            display_name = str(file_obj.data.get("name") or os.path.basename(file_obj.data.get("path", "")) or "")
+            if display_name:
+                stem = os.path.splitext(display_name)[0]
+                existing_names.add(stem.lower())
+
+        if base.lower() not in existing_names:
+            return base
+
+        name_root = base
+        m = re.match(r"^(.*?)(?:_gen(\d+))?$", base, re.IGNORECASE)
+        if m:
+            name_root = (m.group(1) or base).rstrip("_") or "generation"
+        n = 1
+        while True:
+            candidate = "{}_gen{}".format(name_root, n)
+            if candidate.lower() not in existing_names:
+                return candidate
+            n += 1
+
+    def _next_available_path(self, path):
+        if not os.path.exists(path):
+            return path
+        folder = os.path.dirname(path)
+        stem, ext = os.path.splitext(os.path.basename(path))
+        n = 2
+        while True:
+            candidate = os.path.join(folder, "{}_{}{}".format(stem, n, ext))
+            if not os.path.exists(candidate):
+                return candidate
+            n += 1
diff --git a/src/classes/info.py b/src/classes/info.py
index 0cb26d304..07e746dfd 100644
--- a/src/classes/info.py
+++ b/src/classes/info.py
@@ -63,6 +63,8 @@
 USER_PROFILES_PATH = os.path.join(USER_PATH, "profiles")
 USER_PRESETS_PATH = os.path.join(USER_PATH, "presets")
 USER_TITLES_PATH = os.path.join(USER_PATH, "title_templates")
+COMFYUI_PATH = os.path.join(USER_PATH, "comfyui")
+COMFYUI_OUTPUT_PATH = os.path.join(USER_PATH, "comfyui-output")
 USER_COLORS_PATH = os.path.join(USER_PATH, "colors")
 PROTOBUF_DATA_PATH = os.path.join(USER_PATH, "protobuf_data")
 YOLO_PATH = os.path.join(USER_PATH, "yolo")
diff --git a/src/classes/project_data.py b/src/classes/project_data.py
index 15cda3ac1..9b0f20559 100644
--- a/src/classes/project_data.py
+++ b/src/classes/project_data.py
@@ -420,6 +420,7 @@ def load(self, file_path, clear_thumbnails=True):
                 info.BLENDER_PATH = os.path.join(get_assets_path(self.current_filepath), "blender")
                 info.PROTOBUF_DATA_PATH = os.path.join(get_assets_path(self.current_filepath), "protobuf_data")
                 info.CLIPBOARD_PATH = os.path.join(get_assets_path(self.current_filepath), "clipboard")
+                info.COMFYUI_OUTPUT_PATH = os.path.join(get_assets_path(self.current_filepath), "comfyui-output")
 
             # Clear needs save flag
             self.has_unsaved_changes = False
@@ -897,6 +898,7 @@ def save(self, file_path, backup_only=False):
             info.TITLE_PATH = os.path.join(get_assets_path(self.current_filepath), "title")
             info.BLENDER_PATH = os.path.join(get_assets_path(self.current_filepath), "blender")
             info.CLIPBOARD_PATH = os.path.join(get_assets_path(self.current_filepath), "clipboard")
+            info.COMFYUI_OUTPUT_PATH = os.path.join(get_assets_path(self.current_filepath), "comfyui-output")
 
             self.add_to_recent_files(file_path)
             self.has_unsaved_changes = False
@@ -911,12 +913,13 @@ def move_temp_paths_to_project_folder(self, file_path, previous_path=None):
             target_blender_path = os.path.join(asset_path, "blender")
             target_protobuf_path = os.path.join(asset_path, "protobuf_data")
             target_clipboard_path = os.path.join(asset_path, "clipboard")
+            target_comfy_output_path = os.path.join(asset_path, "comfyui-output")
 
             # Create any missing target paths
             try:
                 for target_dir in [asset_path, target_thumb_path, target_title_path,
                                    target_blender_path, target_protobuf_path,
-                                   target_clipboard_path]:
+                                   target_clipboard_path, target_comfy_output_path]:
                     if not os.path.exists(target_dir):
                         os.mkdir(target_dir)
             except OSError:
@@ -931,12 +934,14 @@ def move_temp_paths_to_project_folder(self, file_path, previous_path=None):
                 info.BLENDER_PATH = os.path.join(previous_asset_path, "blender")
                 info.PROTOBUF_DATA_PATH = os.path.join(previous_asset_path, "protobuf_data")
                 info.CLIPBOARD_PATH = os.path.join(previous_asset_path, "clipboard")
+                info.COMFYUI_OUTPUT_PATH = os.path.join(previous_asset_path, "comfyui-output")
 
             # Track assets we copy/update
             copied_assets = {
                 "blender": set(),
                 "title": set(),
                 "clipboard": set(),
+                "comfyui_output": set(),
             }
             reader_paths = {}
 
@@ -974,6 +979,20 @@ def move_temp_paths_to_project_folder(self, file_path, previous_path=None):
                     if not os.path.exists(target_clipboard_filepath):
                         shutil.copy2(working_clipboard_path, target_clipboard_filepath)
 
+            # Copy all ComfyUI output files/folders (fully) to assets folder
+            if os.path.exists(info.COMFYUI_OUTPUT_PATH) and (
+                os.path.abspath(info.COMFYUI_OUTPUT_PATH) != os.path.abspath(target_comfy_output_path)
+            ):
+                for output_name in os.listdir(info.COMFYUI_OUTPUT_PATH):
+                    working_output_path = os.path.join(info.COMFYUI_OUTPUT_PATH, output_name)
+                    target_output_path = os.path.join(target_comfy_output_path, output_name)
+                    if os.path.isdir(working_output_path):
+                        if os.path.exists(target_output_path):
+                            shutil.rmtree(target_output_path, True)
+                        shutil.copytree(working_output_path, target_output_path)
+                    else:
+                        shutil.copy2(working_output_path, target_output_path)
+
             # Copy all protobuf files (if not found in target asset folder)
             if os.path.abspath(info.PROTOBUF_DATA_PATH) != os.path.abspath(target_protobuf_path):
                 for protobuf_path in os.listdir(info.PROTOBUF_DATA_PATH):
@@ -1021,6 +1040,16 @@ def move_temp_paths_to_project_folder(self, file_path, previous_path=None):
                             log.info("Copied clipboard %s to %s", asset_name, target_clipboard_path)
                     new_asset_path = os.path.join(target_clipboard_path, asset_name)
 
+                comfy_output_abs = os.path.abspath(info.COMFYUI_OUTPUT_PATH)
+                path_abs = os.path.abspath(path)
+                if path_abs.startswith(comfy_output_abs + os.sep):
+                    if os.path.abspath(os.path.dirname(path)) != os.path.abspath(target_comfy_output_path):
+                        relative_output_path = os.path.relpath(path_abs, comfy_output_abs)
+                        if relative_output_path not in copied_assets["comfyui_output"]:
+                            copied_assets["comfyui_output"].add(relative_output_path)
+                            log.info("Copied ComfyUI output %s to %s", relative_output_path, target_comfy_output_path)
+                        new_asset_path = os.path.join(target_comfy_output_path, relative_output_path)
+
                 # Update path in File object to new location
                 if new_asset_path:
                     file["path"] = new_asset_path
diff --git a/src/comfyui/image-blur-anything-sam2.json b/src/comfyui/image-blur-anything-sam2.json
new file mode 100644
index 000000000..012f9a12c
--- /dev/null
+++ b/src/comfyui/image-blur-anything-sam2.json
@@ -0,0 +1,93 @@
+{
+  "action_icon": "ai-action-smooth.svg",
+  "menu_category": "enhance",
+  "menu_parent": "track_object",
+  "menu_order": 60,
+  "name": "Blur...",
+  "open_dialog": true,
+  "output_type": "image",
+  "template_id": "image-blur-anything-sam2",
+  "workflow": {
+    "1": {
+      "class_type": "LoadImage",
+      "inputs": {
+        "image": "__openshot_input__",
+        "upload": "image"
+      }
+    },
+    "2": {
+      "class_type": "OpenShotDownloadAndLoadSAM2Model",
+      "inputs": {
+        "model": "sam2.1_hiera_small.safetensors",
+        "segmentor": "single_image",
+        "device": "cuda",
+        "precision": "fp16"
+      }
+    },
+    "3": {
+      "class_type": "OpenShotSam2Segmentation",
+      "inputs": {
+        "sam2_model": [
+          "2",
+          0
+        ],
+        "image": [
+          "1",
+          0
+        ],
+        "auto_mode": false,
+        "positive_points_json": "",
+        "negative_points_json": "",
+        "positive_rects_json": "",
+        "negative_rects_json": "",
+        "dino_prompt": "",
+        "dino_model_id": "IDEA-Research/grounding-dino-tiny",
+        "dino_box_threshold": 0.35,
+        "dino_text_threshold": 0.25,
+        "dino_device": "auto",
+        "keep_model_loaded": false
+      }
+    },
+    "4": {
+      "class_type": "ImageBlur",
+      "inputs": {
+        "image": [
+          "1",
+          0
+        ],
+        "blur_radius": 12,
+        "sigma": 4.0
+      }
+    },
+    "5": {
+      "class_type": "ImageCompositeMasked",
+      "inputs": {
+        "destination": [
+          "1",
+          0
+        ],
+        "source": [
+          "4",
+          0
+        ],
+        "mask": [
+          "3",
+          0
+        ],
+        "x": 0,
+        "y": 0,
+        "resize_source": false
+      }
+    },
+    "6": {
+      "class_type": "SaveImage",
+      "inputs": {
+        "images": [
+          "5",
+          0
+        ],
+        "filename_prefix": "image/openshot_gen"
+      }
+    }
+  }
+}
diff --git a/src/comfyui/image-highlight-anything-sam2.json b/src/comfyui/image-highlight-anything-sam2.json
new file mode 100644
index 000000000..a90dc4987
--- /dev/null
+++ b/src/comfyui/image-highlight-anything-sam2.json
@@ -0,0 +1,81 @@
+{
+  "action_icon": "ai-action-smooth.svg",
+  "menu_category": "enhance",
+  "menu_parent": "track_object",
+  "menu_order": 62,
+  "name": "Highlight...",
+  "open_dialog": true,
+  "output_type": "image",
+  "template_id": "image-highlight-anything-sam2",
+  "workflow": {
+    "1": {
+      "class_type": "LoadImage",
+      "inputs": {
+        "image": "__openshot_input__",
+        "upload": "image"
+      }
+    },
+    "2": {
+      "class_type": "OpenShotDownloadAndLoadSAM2Model",
+      "inputs": {
+        "model": "sam2.1_hiera_small.safetensors",
+        "segmentor": "single_image",
+        "device": "cuda",
+        "precision": "fp16"
+      }
+    },
+    "3": {
+      "class_type": "OpenShotSam2Segmentation",
+      "inputs": {
+        "sam2_model": [
+          "2",
+          0
+        ],
+        "image": [
+          "1",
+          0
+        ],
+        "auto_mode": false,
+        "positive_points_json": "",
+        "negative_points_json": "",
+        "positive_rects_json": "",
+        "negative_rects_json": "",
+        "dino_prompt": "",
+        "dino_model_id": "IDEA-Research/grounding-dino-tiny",
+        "dino_box_threshold": 0.35,
+        "dino_text_threshold": 0.25,
+        "dino_device": "auto",
+        "keep_model_loaded": false
+      }
+    },
+    "4": {
+      "class_type": "OpenShotImageHighlightMasked",
+      "inputs": {
+        "image": [
+          "1",
+          0
+        ],
+        "mask": [
+          "3",
+          0
+        ],
+        "highlight_color": "#F5D742",
+        "highlight_opacity": 0.35,
+        "border_color": "#000000",
+        "border_width": 0,
+        "mask_brightness": 1.15,
+        "background_brightness": 0.75
+      }
+    },
+    "5": {
+      "class_type": "SaveImage",
+      "inputs": {
+        "images": [
+          "4",
+          0
+        ],
+        "filename_prefix": "image/openshot_gen"
+      }
+    }
+  }
+}
diff --git a/src/comfyui/image-mask-anything-sam2.json b/src/comfyui/image-mask-anything-sam2.json
new file mode 100644
index 000000000..abce0bede
--- /dev/null
+++ b/src/comfyui/image-mask-anything-sam2.json
@@ -0,0 +1,71 @@
+{
+  "action_icon": "ai-action-smooth.svg",
+  "menu_category": "enhance",
+  "menu_parent": "track_object",
+  "menu_order": 61,
+  "name": "Mask...",
+  "open_dialog": true,
+  "output_type": "image",
+  "template_id": "image-mask-anything-sam2",
+  "workflow": {
+    "1": {
+      "class_type": "LoadImage",
+      "inputs": {
+        "image": "__openshot_input__",
+        "upload": "image"
+      }
+    },
+    "2": {
+      "class_type": "OpenShotDownloadAndLoadSAM2Model",
+      "inputs": {
+        "model": "sam2.1_hiera_small.safetensors",
+        "segmentor": "single_image",
+        "device": "cuda",
+        "precision": "fp16"
+      }
+    },
+    "3": {
+      "class_type": "OpenShotSam2Segmentation",
+      "inputs": {
+        "sam2_model": [
+          "2",
+          0
+        ],
+        "image": [
+          "1",
+          0
+        ],
+        "auto_mode": false,
+        "positive_points_json": "",
+        "negative_points_json": "",
+        "positive_rects_json": "",
+        "negative_rects_json": "",
+        "dino_prompt": "",
+        "dino_model_id": "IDEA-Research/grounding-dino-tiny",
+        "dino_box_threshold": 0.35,
+        "dino_text_threshold": 0.25,
+        "dino_device": "auto",
+        "keep_model_loaded": false
+      }
+    },
+    "4": {
+      "class_type": "MaskToImage",
+      "inputs": {
+        "mask": [
+          "3",
+          0
+        ]
+      }
+    },
+    "5": {
+      "class_type": "SaveImage",
+      "inputs": {
+        "images": [
+          "4",
+          0
+        ],
+        "filename_prefix": "image/openshot_mask"
+      }
+    }
+  }
+}
diff --git a/src/comfyui/img2img-basic.json b/src/comfyui/img2img-basic.json
new file mode 100644
index 000000000..3a0f26ff3
--- /dev/null
+++ b/src/comfyui/img2img-basic.json
@@ -0,0 +1,107 @@
+{
+  "action_icon": "ai-action-restyle.svg",
+  "menu_category": "enhance",
+  "menu_order": 20,
+  "name": "Change Image Style...",
+  "open_dialog": true,
+  "output_type": "image",
+  "template_id": "img2img-basic",
+  "workflow": {
+    "1": {
+      "class_type": "CheckpointLoaderSimple",
+      "inputs": {
+        "ckpt_name": "sd_xl_turbo_1.0_fp16.safetensors"
+      }
+    },
+    "2": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "clip": [
+          "1",
+          1
+        ],
+        "text": "cinematic shot, highly detailed"
+      }
+    },
+    "3": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "clip": [
+          "1",
+          1
+        ],
+        "text": "low quality, blurry"
+      }
+    },
+    "4": {
+      "class_type": "LoadImage",
+      "inputs": {
+        "image": "/tmp/input.png",
+        "upload": "image"
+      }
+    },
+    "5": {
+      "class_type": "VAEEncode",
+      "inputs": {
+        "pixels": [
+          "4",
+          0
+        ],
+        "vae": [
+          "1",
+          2
+        ]
+      }
+    },
+    "6": {
+      "class_type": "KSampler",
+      "inputs": {
+        "cfg": 7.0,
+        "denoise": 0.65,
+        "latent_image": [
+          "5",
+          0
+        ],
+        "model": [
+          "1",
+          0
+        ],
+        "negative": [
+          "3",
+          0
+        ],
+        "positive": [
+          "2",
+          0
+        ],
+        "sampler_name": "euler",
+        "scheduler": "normal",
+        "seed": 1293041938,
+        "steps": 20
+      }
+    },
+    "7": {
+      "class_type": "VAEDecode",
+      "inputs": {
+        "samples": [
+          "6",
+          0
+        ],
+        "vae": [
+          "1",
+          2
+        ]
+      }
+    },
+    "8": {
+      "class_type": "SaveImage",
+      "inputs": {
+        "filename_prefix": "openshot_gen",
+        "images": [
+          "7",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/img2video-svd.json b/src/comfyui/img2video-svd.json
new file mode 100644
index 000000000..de8acc653
--- /dev/null
+++ b/src/comfyui/img2video-svd.json
@@ -0,0 +1,148 @@
+{
+  "action_icon": "ai-action-create-video.svg",
+  "menu_category": "enhance",
+  "menu_order": 21,
+  "name": "Image to Video...",
+  "open_dialog": true,
+  "output_type": "video",
+  "template_id": "img2video-svd",
+  "workflow": {
+    "2": {
+      "class_type": "LoadImage",
+      "inputs": {
+        "image": "__openshot_input__",
+        "upload": "image"
+      }
+    },
+    "3": {
+      "class_type": "KSampler",
+      "inputs": {
+        "seed": 82628696717253,
+        "steps": 30,
+        "cfg": 6,
+        "sampler_name": "uni_pc",
+        "scheduler": "simple",
+        "denoise": 1,
+        "model": [
+          "48",
+          0
+        ],
+        "positive": [
+          "6",
+          0
+        ],
+        "negative": [
+          "7",
+          0
+        ],
+        "latent_image": [
+          "40",
+          0
+        ]
+      }
+    },
+    "6": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "text": "__openshot_prompt__",
+        "clip": [
+          "38",
+          0
+        ]
+      }
+    },
+    "7": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "text": "low quality, blurry, overexposed, static scene, washed out, text subtitles, watermark, logo, distorted hands, deformed face, bad anatomy",
+        "clip": [
+          "38",
+          0
+        ]
+      }
+    },
+    "8": {
+      "class_type": "VAEDecode",
+      "inputs": {
+        "samples": [
+          "3",
+          0
+        ],
+        "vae": [
+          "39",
+          0
+        ]
+      }
+    },
+    "37": {
+      "class_type": "UNETLoader",
+      "inputs": {
+        "unet_name": "split_files/diffusion_models/wan2.2_ti2v_5B_fp16.safetensors",
+        "weight_dtype": "default"
+      }
+    },
+    "38": {
+      "class_type": "CLIPLoader",
+      "inputs": {
+        "clip_name": "split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+        "type": "wan",
+        "device": "default"
+      }
+    },
+    "39": {
+      "class_type": "VAELoader",
+      "inputs": {
+        "vae_name": "wan2.2_vae.safetensors"
+      }
+    },
+    "40": {
+      "class_type": "Wan22ImageToVideoLatent",
+      "inputs": {
+        "start_image": [
+          "2",
+          0
+        ],
+        "vae": [
+          "39",
+          0
+        ],
+        "width": 832,
+        "height": 480,
+        "length": 64,
+        "batch_size": 1
+      }
+    },
+    "48": {
+      "class_type": "ModelSamplingSD3",
+      "inputs": {
+        "shift": 8,
+        "model": [
+          "37",
+          0
+        ]
+      }
+    },
+    "49": {
+      "class_type": "CreateVideo",
+      "inputs": {
+        "fps": 16,
+        "images": [
+          "8",
+          0
+        ]
+      }
+    },
+    "50": {
+      "class_type": "SaveVideo",
+      "inputs": {
+        "filename_prefix": "video/openshot_gen",
+        "format": "auto",
+        "codec": "auto",
+        "video": [
+          "49",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/txt2audio-stable-open.json b/src/comfyui/txt2audio-stable-open.json
new file mode 100644
index 000000000..2d18c117b
--- /dev/null
+++ b/src/comfyui/txt2audio-stable-open.json
@@ -0,0 +1,101 @@
+{
+  "action_icon": "ai-action-create-audio.svg",
+  "menu_category": "create",
+  "menu_order": 30,
+  "name": "Sound...",
+  "open_dialog": true,
+  "output_type": "audio",
+  "template_id": "txt2audio-stable-open",
+  "workflow": {
+    "10": {
+      "class_type": "CLIPLoader",
+      "inputs": {
+        "clip_name": "t5-base.safetensors",
+        "type": "stable_audio"
+      }
+    },
+    "11": {
+      "class_type": "EmptyLatentAudio",
+      "inputs": {
+        "batch_size": 1,
+        "seconds": 30.0
+      }
+    },
+    "12": {
+      "class_type": "VAEDecodeAudio",
+      "inputs": {
+        "samples": [
+          "3",
+          0
+        ],
+        "vae": [
+          "4",
+          2
+        ]
+      }
+    },
+    "13": {
+      "class_type": "SaveAudio",
+      "inputs": {
+        "audio": [
+          "12",
+          0
+        ],
+        "filename_prefix": "audio/openshot_gen"
+      }
+    },
+    "3": {
+      "class_type": "KSampler",
+      "inputs": {
+        "cfg": 5.0,
+        "denoise": 1.0,
+        "latent_image": [
+          "11",
+          0
+        ],
+        "model": [
+          "4",
+          0
+        ],
+        "negative": [
+          "7",
+          0
+        ],
+        "positive": [
+          "6",
+          0
+        ],
+        "sampler_name": "dpmpp_3m_sde_gpu",
+        "scheduler": "exponential",
+        "seed": 806751699,
+        "steps": 50
+      }
+    },
+    "4": {
+      "class_type": "CheckpointLoaderSimple",
+      "inputs": {
+        "ckpt_name": "sd_xl_turbo_1.0_fp16.safetensors"
+      }
+    },
+    "6": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "clip": [
+          "10",
+          0
+        ],
+        "text": "lofi ambient beat, soft texture, 90 bpm"
+      }
+    },
+    "7": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "clip": [
+          "10",
+          0
+        ],
+        "text": ""
+      }
+    }
+  }
+}
diff --git a/src/comfyui/txt2img-basic.json b/src/comfyui/txt2img-basic.json
new file mode 100644
index 000000000..5302b4108
--- /dev/null
+++ b/src/comfyui/txt2img-basic.json
@@ -0,0 +1,95 @@
+{
+  "action_icon": "ai-action-create-image.svg",
+  "menu_category": "create",
+  "menu_order": 10,
+  "name": "Image...",
+  "open_dialog": true,
+  "output_type": "image",
+  "template_id": "txt2img-basic",
+  "workflow": {
+    "1": {
+      "class_type": "CheckpointLoaderSimple",
+      "inputs": {
+        "ckpt_name": "sd_xl_base_1.0.safetensors"
+      }
+    },
+    "2": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "clip": [
+          "1",
+          1
+        ],
+        "text": "cinematic shot, highly detailed"
+      }
+    },
+    "3": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "clip": [
+          "1",
+          1
+        ],
+        "text": "low quality, blurry"
+      }
+    },
+    "4": {
+      "class_type": "EmptyLatentImage",
+      "inputs": {
+        "batch_size": 1,
+        "height": 576,
+        "width": 1024
+      }
+    },
+    "5": {
+      "class_type": "KSampler",
+      "inputs": {
+        "cfg": 6.5,
+        "denoise": 1.0,
+        "latent_image": [
+          "4",
+          0
+        ],
+        "model": [
+          "1",
+          0
+        ],
+        "negative": [
+          "3",
+          0
+        ],
+        "positive": [
+          "2",
+          0
+        ],
+        "sampler_name": "euler",
+        "scheduler": "normal",
+        "seed": 687962524,
+        "steps": 28
+      }
+    },
+    "6": {
+      "class_type": "VAEDecode",
+      "inputs": {
+        "samples": [
+          "5",
+          0
+        ],
+        "vae": [
+          "1",
+          2
+        ]
+      }
+    },
+    "7": {
+      "class_type": "SaveImage",
+      "inputs": {
+        "filename_prefix": "openshot_gen",
+        "images": [
+          "6",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/txt2music-ace-step.json b/src/comfyui/txt2music-ace-step.json
new file mode 100644
index 000000000..2126057e3
--- /dev/null
+++ b/src/comfyui/txt2music-ace-step.json
@@ -0,0 +1,118 @@
+{
+  "action_icon": "ai-action-create-music.svg",
+  "menu_category": "create",
+  "menu_order": 25,
+  "name": "Music...",
+  "open_dialog": true,
+  "output_type": "music",
+  "template_id": "txt2music-ace-step",
+  "workflow": {
+    "1": {
+      "class_type": "CheckpointLoaderSimple",
+      "inputs": {
+        "ckpt_name": "ace_step_1.5_turbo_aio.safetensors"
+      }
+    },
+    "2": {
+      "class_type": "TextEncodeAceStepAudio1.5",
+      "inputs": {
+        "clip": [
+          "1",
+          1
+        ],
+        "tags": "__openshot_prompt__",
+        "lyrics": "__openshot_lyrics__",
+        "seed": 31,
+        "bpm": 120,
+        "duration": 60.0,
+        "timesignature": "4",
+        "language": "en",
+        "keyscale": "E minor",
+        "generate_audio_codes": true,
+        "cfg_scale": 3.0,
+        "temperature": 0.9,
+        "top_p": 0.9,
+        "top_k": 0,
+        "min_p": 0.0
+      }
+    },
+    "3": {
+      "class_type": "ConditioningZeroOut",
+      "inputs": {
+        "conditioning": [
+          "2",
+          0
+        ]
+      }
+    },
+    "4": {
+      "class_type": "EmptyAceStep1.5LatentAudio",
+      "inputs": {
+        "seconds": 60.0,
+        "batch_size": 1
+      }
+    },
+    "5": {
+      "class_type": "ModelSamplingAuraFlow",
+      "inputs": {
+        "model": [
+          "1",
+          0
+        ],
+        "shift": 3
+      }
+    },
+    "6": {
+      "class_type": "KSampler",
+      "inputs": {
+        "seed": 31,
+        "steps": 8,
+        "cfg": 1.0,
+        "sampler_name": "euler",
+        "scheduler": "simple",
+        "denoise": 1.0,
+        "model": [
+          "5",
+          0
+        ],
+        "positive": [
+          "2",
+          0
+        ],
+        "negative": [
+          "3",
+          0
+        ],
+        "latent_image": [
+          "4",
+          0
+        ]
+      }
+    },
+    "7": {
+      "class_type": "VAEDecodeAudio",
+      "inputs": {
+        "samples": [
+          "6",
+          0
+        ],
+        "vae": [
+          "1",
+          2
+        ]
+      }
+    },
+    "8": {
+      "class_type": "SaveAudioMP3",
+      "inputs": {
+        "filename_prefix": "audio/openshot_gen",
+        "quality": "V0",
+        "audioUI": "",
+        "audio": [
+          "7",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/txt2video-svd.json b/src/comfyui/txt2video-svd.json
new file mode 100644
index 000000000..73af62674
--- /dev/null
+++ b/src/comfyui/txt2video-svd.json
@@ -0,0 +1,133 @@
+{
+  "action_icon": "ai-action-create-video.svg",
+  "menu_category": "create",
+  "menu_order": 20,
+  "name": "Video...",
+  "open_dialog": true,
+  "output_type": "video",
+  "template_id": "txt2video-svd",
+  "workflow": {
+    "3": {
+      "class_type": "KSampler",
+      "inputs": {
+        "seed": 82628696717253,
+        "steps": 30,
+        "cfg": 6,
+        "sampler_name": "uni_pc",
+        "scheduler": "simple",
+        "denoise": 1,
+        "model": [
+          "48",
+          0
+        ],
+        "positive": [
+          "6",
+          0
+        ],
+        "negative": [
+          "7",
+          0
+        ],
+        "latent_image": [
+          "40",
+          0
+        ]
+      }
+    },
+    "6": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "text": "__openshot_prompt__",
+        "clip": [
+          "38",
+          0
+        ]
+      }
+    },
+    "7": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "text": "low quality, blurry, overexposed, static scene, washed out, text subtitles, watermark, logo, distorted hands, deformed face, bad anatomy",
+        "clip": [
+          "38",
+          0
+        ]
+      }
+    },
+    "8": {
+      "class_type": "VAEDecode",
+      "inputs": {
+        "samples": [
+          "3",
+          0
+        ],
+        "vae": [
+          "39",
+          0
+        ]
+      }
+    },
+    "37": {
+      "class_type": "UNETLoader",
+      "inputs": {
+        "unet_name": "split_files/diffusion_models/wan2.1_t2v_1.3B_fp16.safetensors",
+        "weight_dtype": "default"
+      }
+    },
+    "38": {
+      "class_type": "CLIPLoader",
+      "inputs": {
+        "clip_name": "split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+        "type": "wan",
+        "device": "default"
+      }
+    },
+    "39": {
+      "class_type": "VAELoader",
+      "inputs": {
+        "vae_name": "wan_2.1_vae.safetensors"
+      }
+    },
+    "40": {
+      "class_type": "EmptyHunyuanLatentVideo",
+      "inputs": {
+        "width": 832,
+        "height": 480,
+        "length": 64,
+        "batch_size": 1
+      }
+    },
+    "48": {
+      "class_type": "ModelSamplingSD3",
+      "inputs": {
+        "shift": 8,
+        "model": [
+          "37",
+          0
+        ]
+      }
+    },
+    "49": {
+      "class_type": "CreateVideo",
+      "inputs": {
+        "fps": 16,
+        "images": [
+          "8",
+          0
+        ]
+      }
+    },
+    "50": {
+      "class_type": "SaveVideo",
+      "inputs": {
+        "filename_prefix": "video/openshot_gen",
+        "format": "auto",
+        "codec": "auto",
+        "video": [
+          "49",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/upscale-realesrgan-x4.json b/src/comfyui/upscale-realesrgan-x4.json
new file mode 100644
index 000000000..b25cfd30b
--- /dev/null
+++ b/src/comfyui/upscale-realesrgan-x4.json
@@ -0,0 +1,47 @@
+{
+  "action_icon": "ai-action-upscale.svg",
+  "menu_category": "enhance",
+  "menu_order": 10,
+  "name": "Increase Resolution (4x)",
+  "open_dialog": false,
+  "output_type": "image",
+  "template_id": "upscale-realesrgan-x4",
+  "workflow": {
+    "1": {
+      "class_type": "LoadImage",
+      "inputs": {
+        "image": "/tmp/input.png",
+        "upload": "image"
+      }
+    },
+    "2": {
+      "class_type": "UpscaleModelLoader",
+      "inputs": {
+        "model_name": "RealESRGAN_x4plus.safetensors"
+      }
+    },
+    "3": {
+      "class_type": "ImageUpscaleWithModel",
+      "inputs": {
+        "image": [
+          "1",
+          0
+        ],
+        "upscale_model": [
+          "2",
+          0
+        ]
+      }
+    },
+    "4": {
+      "class_type": "SaveImage",
+      "inputs": {
+        "filename_prefix": "openshot_gen",
+        "images": [
+          "3",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/video-blur-anything-sam2.json b/src/comfyui/video-blur-anything-sam2.json
new file mode 100644
index 000000000..5c2674b60
--- /dev/null
+++ b/src/comfyui/video-blur-anything-sam2.json
@@ -0,0 +1,250 @@
+{
+  "action_icon": "ai-action-smooth.svg",
+  "menu_category": "enhance",
+  "menu_order": 60,
+  "name": "Blur...",
+  "menu_parent": "track_object",
+  "open_dialog": true,
+  "output_type": "video",
+  "template_id": "video-blur-anything-sam2",
+  "workflow": {
+    "1": {
+      "class_type": "VHS_LoadVideo",
+      "inputs": {
+        "video": "__openshot_input__",
+        "force_rate": 0,
+        "custom_width": 0,
+        "custom_height": 0,
+        "frame_load_cap": 0,
+        "skip_first_frames": 0,
+        "select_every_nth": 1
+      }
+    },
+    "2": {
+      "class_type": "VHS_VideoInfoLoaded",
+      "inputs": {
+        "video_info": [
+          "1",
+          3
+        ]
+      }
+    },
+    "3": {
+      "class_type": "OpenShotDownloadAndLoadSAM2Model",
+      "inputs": {
+        "model": "sam2.1_hiera_base_plus.safetensors",
+        "segmentor": "video",
+        "device": "cuda",
+        "precision": "fp16"
+      }
+    },
+    "4": {
+      "class_type": "OpenShotSam2VideoSegmentationAddPoints",
+      "inputs": {
+        "sam2_model": [
+          "3",
+          0
+        ],
+        "frame_index": 0,
+        "object_index": 0,
+        "video_path": "__openshot_input__",
+        "windowed_mode": true,
+        "offload_video_to_cpu": false,
+        "offload_state_to_cpu": false,
+        "auto_mode": false,
+        "positive_points_json": "",
+        "negative_points_json": "",
+        "positive_rects_json": "",
+        "negative_rects_json": "",
+        "tracking_selection_json": "{}",
+        "dino_prompt": "",
+        "dino_model_id": "IDEA-Research/grounding-dino-tiny",
+        "dino_box_threshold": 0.35,
+        "dino_text_threshold": 0.25,
+        "dino_device": "auto"
+        ,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "5": {
+      "class_type": "OpenShotSam2VideoSegmentationChunked",
+      "inputs": {
+        "sam2_model": [
+          "4",
+          0
+        ],
+        "inference_state": [
+          "4",
+          1
+        ],
+        "image": [
+          "10",
+          0
+        ],
+        "start_frame": 0,
+        "chunk_size_frames": 96,
+        "keep_model_loaded": true,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "6": {
+      "class_type": "MaskToImage",
+      "inputs": {
+        "mask": [
+          "5",
+          0
+        ]
+      }
+    },
+    "7": {
+      "class_type": "VHS_VideoCombine",
+      "inputs": {
+        "images": [
+          "6",
+          0
+        ],
+        "frame_rate": [
+          "2",
+          0
+        ],
+        "loop_count": 0,
+        "filename_prefix": "video/openshot_mask",
+        "format": "video/ffv1-mkv",
+        "pingpong": false,
+        "save_output": true,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "8": {
+      "class_type": "VHS_SelectFilename",
+      "inputs": {
+        "filenames": [
+          "7",
+          0
+        ],
+        "index": -1
+      }
+    },
+    "9": {
+      "class_type": "VHS_BatchManager",
+      "inputs": {
+        "frames_per_batch": 96
+      }
+    },
+    "10": {
+      "class_type": "VHS_LoadVideo",
+      "inputs": {
+        "video": "__openshot_input__",
+        "force_rate": 0,
+        "custom_width": 0,
+        "custom_height": 0,
+        "frame_load_cap": 0,
+        "skip_first_frames": 0,
+        "select_every_nth": 1,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "11": {
+      "class_type": "VHS_LoadVideoPath",
+      "inputs": {
+        "video": [
+          "8",
+          0
+        ],
+        "force_rate": 0,
+        "custom_width": 0,
+        "custom_height": 0,
+        "frame_load_cap": 0,
+        "skip_first_frames": 0,
+        "select_every_nth": 1,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "12": {
+      "class_type": "ImageToMask",
+      "inputs": {
+        "image": [
+          "11",
+          0
+        ],
+        "channel": "red"
+      }
+    },
+    "13": {
+      "class_type": "OpenShotImageBlurMasked",
+      "inputs": {
+        "image": [
+          "10",
+          0
+        ],
+        "mask": [
+          "5",
+          0
+        ],
+        "blur_radius": 12,
+        "sigma": 4.0
+      }
+    },
+    "14": {
+      "class_type": "ImageCompositeMasked",
+      "inputs": {
+        "destination": [
+          "10",
+          0
+        ],
+        "source": [
+          "13",
+          0
+        ],
+        "mask": [
+          "5",
+          0
+        ],
+        "x": 0,
+        "y": 0,
+        "resize_source": false
+      }
+    },
+    "15": {
+      "class_type": "VHS_VideoCombine",
+      "inputs": {
+        "images": [
+          "13",
+          0
+        ],
+        "frame_rate": [
+          "2",
+          0
+        ],
+        "loop_count": 0,
+        "filename_prefix": "video/openshot_gen",
+        "format": "video/h264-mp4",
+        "pingpong": false,
+        "save_output": true,
+        "audio": [
+          "1",
+          2
+        ],
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/video-frame-interpolation-rife2x.json b/src/comfyui/video-frame-interpolation-rife2x.json
new file mode 100644
index 000000000..c9554d6ad
--- /dev/null
+++ b/src/comfyui/video-frame-interpolation-rife2x.json
@@ -0,0 +1,70 @@
+{
+  "action_icon": "ai-action-smooth.svg",
+  "menu_category": "enhance",
+  "menu_order": 20,
+  "name": "Smooth Motion (2x Frame Rate)",
+  "open_dialog": false,
+  "output_type": "video",
+  "template_id": "video-frame-interpolation-rife2x",
+  "workflow": {
+    "1": {
+      "class_type": "LoadVideo",
+      "inputs": {
+        "file": "/tmp/input.mp4"
+      }
+    },
+    "2": {
+      "class_type": "GetVideoComponents",
+      "inputs": {
+        "video": [
+          "1",
+          0
+        ]
+      }
+    },
+    "3": {
+      "_meta": {
+        "title": "RIFE VFI (recommend rife47 and rife49)"
+      },
+      "class_type": "RIFE VFI",
+      "inputs": {
+        "ckpt_name": "rife47.pth",
+        "clear_cache_after_n_frames": 10,
+        "ensemble": true,
+        "fast_mode": true,
+        "frames": [
+          "2",
+          0
+        ],
+        "multiplier": 2,
+        "scale_factor": 1
+      }
+    },
+    "4": {
+      "class_type": "CreateVideo",
+      "inputs": {
+        "audio": [
+          "2",
+          1
+        ],
+        "fps": 60.0,
+        "images": [
+          "3",
+          0
+        ]
+      }
+    },
+    "5": {
+      "class_type": "SaveVideo",
+      "inputs": {
+        "codec": "auto",
+        "filename_prefix": "video/openshot_gen",
+        "format": "auto",
+        "video": [
+          "4",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/video-highlight-anything-sam2.json b/src/comfyui/video-highlight-anything-sam2.json
new file mode 100644
index 000000000..596f67c9d
--- /dev/null
+++ b/src/comfyui/video-highlight-anything-sam2.json
@@ -0,0 +1,164 @@
+{
+  "action_icon": "ai-action-smooth.svg",
+  "menu_category": "enhance",
+  "menu_parent": "track_object",
+  "menu_order": 62,
+  "name": "Highlight...",
+  "open_dialog": true,
+  "output_type": "video",
+  "template_id": "video-highlight-anything-sam2",
+  "workflow": {
+    "1": {
+      "class_type": "VHS_LoadVideo",
+      "inputs": {
+        "video": "__openshot_input__",
+        "force_rate": 0,
+        "custom_width": 0,
+        "custom_height": 0,
+        "frame_load_cap": 0,
+        "skip_first_frames": 0,
+        "select_every_nth": 1
+      }
+    },
+    "2": {
+      "class_type": "VHS_VideoInfoLoaded",
+      "inputs": {
+        "video_info": [
+          "1",
+          3
+        ]
+      }
+    },
+    "3": {
+      "class_type": "OpenShotDownloadAndLoadSAM2Model",
+      "inputs": {
+        "model": "sam2.1_hiera_base_plus.safetensors",
+        "segmentor": "video",
+        "device": "cuda",
+        "precision": "fp16"
+      }
+    },
+    "4": {
+      "class_type": "OpenShotSam2VideoSegmentationAddPoints",
+      "inputs": {
+        "sam2_model": [
+          "3",
+          0
+        ],
+        "frame_index": 0,
+        "object_index": 0,
+        "video_path": "__openshot_input__",
+        "windowed_mode": true,
+        "offload_video_to_cpu": false,
+        "offload_state_to_cpu": false,
+        "auto_mode": false,
+        "positive_points_json": "",
+        "negative_points_json": "",
+        "positive_rects_json": "",
+        "negative_rects_json": "",
+        "tracking_selection_json": "{}",
+        "dino_prompt": "",
+        "dino_model_id": "IDEA-Research/grounding-dino-tiny",
+        "dino_box_threshold": 0.35,
+        "dino_text_threshold": 0.25,
+        "dino_device": "auto"
+        ,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "5": {
+      "class_type": "OpenShotSam2VideoSegmentationChunked",
+      "inputs": {
+        "sam2_model": [
+          "4",
+          0
+        ],
+        "inference_state": [
+          "4",
+          1
+        ],
+        "image": [
+          "10",
+          0
+        ],
+        "start_frame": 0,
+        "chunk_size_frames": 96,
+        "keep_model_loaded": true,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "9": {
+      "class_type": "VHS_BatchManager",
+      "inputs": {
+        "frames_per_batch": 96
+      }
+    },
+    "10": {
+      "class_type": "VHS_LoadVideo",
+      "inputs": {
+        "video": "__openshot_input__",
+        "force_rate": 0,
+        "custom_width": 0,
+        "custom_height": 0,
+        "frame_load_cap": 0,
+        "skip_first_frames": 0,
+        "select_every_nth": 1,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "13": {
+      "class_type": "OpenShotImageHighlightMasked",
+      "inputs": {
+        "image": [
+          "10",
+          0
+        ],
+        "mask": [
+          "5",
+          0
+        ],
+        "highlight_color": "#F5D742",
+        "highlight_opacity": 0.35,
+        "border_color": "#000000",
+        "border_width": 0,
+        "mask_brightness": 1.15,
+        "background_brightness": 0.75
+      }
+    },
+    "15": {
+      "class_type": "VHS_VideoCombine",
+      "inputs": {
+        "images": [
+          "13",
+          0
+        ],
+        "frame_rate": [
+          "2",
+          0
+        ],
+        "loop_count": 0,
+        "filename_prefix": "video/openshot_gen",
+        "format": "video/h264-mp4",
+        "pingpong": false,
+        "save_output": true,
+        "audio": [
+          "1",
+          2
+        ],
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/video-mask-anything-sam2.json b/src/comfyui/video-mask-anything-sam2.json
new file mode 100644
index 000000000..60df8e421
--- /dev/null
+++ b/src/comfyui/video-mask-anything-sam2.json
@@ -0,0 +1,150 @@
+{
+  "action_icon": "ai-action-smooth.svg",
+  "menu_category": "enhance",
+  "menu_parent": "track_object",
+  "menu_order": 61,
+  "name": "Mask...",
+  "open_dialog": true,
+  "output_type": "video",
+  "template_id": "video-mask-anything-sam2",
+  "workflow": {
+    "1": {
+      "class_type": "VHS_LoadVideo",
+      "inputs": {
+        "video": "__openshot_input__",
+        "force_rate": 0,
+        "custom_width": 0,
+        "custom_height": 0,
+        "frame_load_cap": 0,
+        "skip_first_frames": 0,
+        "select_every_nth": 1
+      }
+    },
+    "2": {
+      "class_type": "VHS_VideoInfoLoaded",
+      "inputs": {
+        "video_info": [
+          "1",
+          3
+        ]
+      }
+    },
+    "3": {
+      "class_type": "OpenShotDownloadAndLoadSAM2Model",
+      "inputs": {
+        "model": "sam2.1_hiera_base_plus.safetensors",
+        "segmentor": "video",
+        "device": "cuda",
+        "precision": "fp16"
+      }
+    },
+    "4": {
+      "class_type": "OpenShotSam2VideoSegmentationAddPoints",
+      "inputs": {
+        "sam2_model": [
+          "3",
+          0
+        ],
+        "frame_index": 0,
+        "object_index": 0,
+        "video_path": "__openshot_input__",
+        "windowed_mode": true,
+        "offload_video_to_cpu": false,
+        "offload_state_to_cpu": false,
+        "auto_mode": false,
+        "positive_points_json": "",
+        "negative_points_json": "",
+        "positive_rects_json": "",
+        "negative_rects_json": "",
+        "tracking_selection_json": "{}",
+        "dino_prompt": "",
+        "dino_model_id": "IDEA-Research/grounding-dino-tiny",
+        "dino_box_threshold": 0.35,
+        "dino_text_threshold": 0.25,
+        "dino_device": "auto"
+        ,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "5": {
+      "class_type": "OpenShotSam2VideoSegmentationChunked",
+      "inputs": {
+        "sam2_model": [
+          "4",
+          0
+        ],
+        "inference_state": [
+          "4",
+          1
+        ],
+        "image": [
+          "10",
+          0
+        ],
+        "start_frame": 0,
+        "chunk_size_frames": 96,
+        "keep_model_loaded": true,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "6": {
+      "class_type": "MaskToImage",
+      "inputs": {
+        "mask": [
+          "5",
+          0
+        ]
+      }
+    },
+    "7": {
+      "class_type": "VHS_VideoCombine",
+      "inputs": {
+        "images": [
+          "6",
+          0
+        ],
+        "frame_rate": [
+          "2",
+          0
+        ],
+        "loop_count": 0,
+        "filename_prefix": "video/openshot_mask",
+        "format": "video/h264-mp4",
+        "pingpong": false,
+        "save_output": true,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    },
+    "9": {
+      "class_type": "VHS_BatchManager",
+      "inputs": {
+        "frames_per_batch": 96
+      }
+    },
+    "10": {
+      "class_type": "VHS_LoadVideo",
+      "inputs": {
+        "video": "__openshot_input__",
+        "force_rate": 0,
+        "custom_width": 0,
+        "custom_height": 0,
+        "frame_load_cap": 0,
+        "skip_first_frames": 0,
+        "select_every_nth": 1,
+        "meta_batch": [
+          "9",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/video-segment-scenes-transnet.json b/src/comfyui/video-segment-scenes-transnet.json
new file mode 100644
index 000000000..40a8c1970
--- /dev/null
+++ b/src/comfyui/video-segment-scenes-transnet.json
@@ -0,0 +1,73 @@
+{
+  "action_icon": "ai-action-scenes.svg",
+  "menu_category": "enhance",
+  "menu_order": 30,
+  "name": "Split into Scenes",
+  "open_dialog": false,
+  "output_type": "video",
+  "template_id": "video-segment-scenes-transnet",
+  "workflow": {
+    "1": {
+      "_meta": {
+        "title": "MiaoshouAI Segment Video"
+      },
+      "class_type": "TransNetV2_Run",
+      "inputs": {
+        "TransNet_model": [
+          "2",
+          0
+        ],
+        "min_scene_length": 30,
+        "output_dir": "output",
+        "threshold": 0.5,
+        "video": [
+          "7",
+          0
+        ]
+      }
+    },
+    "2": {
+      "_meta": {
+        "title": "MiaoshouAI Load TransNet Model"
+      },
+      "class_type": "DownloadAndLoadTransNetModel",
+      "inputs": {
+        "device": "auto",
+        "model": "transnetv2-pytorch-weights"
+      }
+    },
+    "7": {
+      "class_type": "LoadVideo",
+      "inputs": {
+        "file": "/tmp/input.mp4"
+      }
+    },
+    "8": {
+      "_meta": {
+        "title": "MiaoshouAI Select Video"
+      },
+      "class_type": "SelectVideo",
+      "inputs": {
+        "index": 0,
+        "segment_paths": [
+          "1",
+          0
+        ]
+      }
+    },
+    "9": {
+      "_meta": {
+        "title": "Preview Any"
+      },
+      "class_type": "PreviewAny",
+      "inputs": {
+        "preview": "",
+        "previewMode": null,
+        "source": [
+          "1",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/video-upscale-gan.json b/src/comfyui/video-upscale-gan.json
new file mode 100644
index 000000000..9ffb0dabf
--- /dev/null
+++ b/src/comfyui/video-upscale-gan.json
@@ -0,0 +1,86 @@
+{
+  "action_icon": "ai-action-upscale.svg",
+  "menu_category": "enhance",
+  "menu_order": 10,
+  "name": "Increase Resolution (4x)",
+  "open_dialog": false,
+  "output_type": "video",
+  "template_id": "video-upscale-gan",
+  "workflow": {
+    "1": {
+      "class_type": "LoadVideo",
+      "inputs": {
+        "file": "/tmp/input.mp4"
+      }
+    },
+    "2": {
+      "class_type": "Video Slice",
+      "inputs": {
+        "duration": 10.0,
+        "start_time": 0.0,
+        "strict_duration": false,
+        "video": [
+          "1",
+          0
+        ]
+      }
+    },
+    "3": {
+      "class_type": "GetVideoComponents",
+      "inputs": {
+        "video": [
+          "2",
+          0
+        ]
+      }
+    },
+    "4": {
+      "class_type": "UpscaleModelLoader",
+      "inputs": {
+        "model_name": "RealESRGAN_x4plus.safetensors"
+      }
+    },
+    "5": {
+      "class_type": "ImageUpscaleWithModel",
+      "inputs": {
+        "image": [
+          "3",
+          0
+        ],
+        "upscale_model": [
+          "4",
+          0
+        ]
+      }
+    },
+    "6": {
+      "class_type": "CreateVideo",
+      "inputs": {
+        "audio": [
+          "3",
+          1
+        ],
+        "fps": [
+          "3",
+          2
+        ],
+        "images": [
+          "5",
+          0
+        ]
+      }
+    },
+    "7": {
+      "class_type": "SaveVideo",
+      "inputs": {
+        "codec": "auto",
+        "filename_prefix": "video/openshot_gen",
+        "format": "auto",
+        "video": [
+          "6",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/video-whisper-srt.json b/src/comfyui/video-whisper-srt.json
new file mode 100644
index 000000000..59edbdb1d
--- /dev/null
+++ b/src/comfyui/video-whisper-srt.json
@@ -0,0 +1,57 @@
+{
+  "action_icon": "ai-action-captions.svg",
+  "menu_category": "enhance",
+  "menu_order": 40,
+  "name": "Add Captions from Speech",
+  "open_dialog": false,
+  "output_type": "video",
+  "template_id": "video-whisper-srt",
+  "workflow": {
+    "1": {
+      "class_type": "VHS_LoadVideo",
+      "inputs": {
+        "custom_height": 0,
+        "custom_width": 0,
+        "force_rate": 0,
+        "format": "AnimateDiff",
+        "frame_load_cap": 0,
+        "select_every_nth": 1,
+        "skip_first_frames": 0,
+        "video": "/tmp/input.mp4"
+      }
+    },
+    "2": {
+      "class_type": "Apply Whisper",
+      "inputs": {
+        "audio": [
+          "1",
+          2
+        ],
+        "language": "auto",
+        "model": "medium",
+        "prompt": ""
+      }
+    },
+    "3": {
+      "class_type": "Save SRT",
+      "inputs": {
+        "alignment": [
+          "2",
+          1
+        ],
+        "name": "openshot_gen_segments"
+      }
+    },
+    "4": {
+      "class_type": "PreviewAny",
+      "inputs": {
+        "preview": "",
+        "previewMode": null,
+        "source": [
+          "3",
+          0
+        ]
+      }
+    }
+  }
+}
diff --git a/src/comfyui/video2video-basic.json b/src/comfyui/video2video-basic.json
new file mode 100644
index 000000000..bc86fd212
--- /dev/null
+++ b/src/comfyui/video2video-basic.json
@@ -0,0 +1,146 @@
+{
+  "action_icon": "ai-action-restyle.svg",
+  "menu_category": "enhance",
+  "menu_order": 50,
+  "name": "Change Video Style...",
+  "open_dialog": true,
+  "output_type": "video",
+  "template_id": "video2video-basic",
+  "workflow": {
+    "1": {
+      "class_type": "LoadVideo",
+      "inputs": {
+        "file": "/tmp/input.mp4"
+      }
+    },
+    "10": {
+      "class_type": "CreateVideo",
+      "inputs": {
+        "audio": [
+          "3",
+          1
+        ],
+        "fps": [
+          "3",
+          2
+        ],
+        "images": [
+          "9",
+          0
+        ]
+      }
+    },
+    "11": {
+      "class_type": "SaveVideo",
+      "inputs": {
+        "codec": "auto",
+        "filename_prefix": "video/openshot_gen",
+        "format": "auto",
+        "video": [
+          "10",
+          0
+        ]
+      }
+    },
+    "2": {
+      "class_type": "Video Slice",
+      "inputs": {
+        "duration": 10.0,
+        "start_time": 0.0,
+        "strict_duration": false,
+        "video": [
+          "1",
+          0
+        ]
+      }
+    },
+    "3": {
+      "class_type": "GetVideoComponents",
+      "inputs": {
+        "video": [
+          "2",
+          0
+        ]
+      }
+    },
+    "4": {
+      "class_type": "CheckpointLoaderSimple",
+      "inputs": {
+        "ckpt_name": "sd_xl_turbo_1.0_fp16.safetensors"
+      }
+    },
+    "5": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "clip": [
+          "4",
+          1
+        ],
+        "text": "cinematic shot, highly detailed"
+      }
+    },
+    "6": {
+      "class_type": "CLIPTextEncode",
+      "inputs": {
+        "clip": [
+          "4",
+          1
+        ],
+        "text": "low quality, blurry"
+      }
+    },
+    "7": {
+      "class_type": "VAEEncode",
+      "inputs": {
+        "pixels": [
+          "3",
+          0
+        ],
+        "vae": [
+          "4",
+          2
+        ]
+      }
+    },
+    "8": {
+      "class_type": "KSampler",
+      "inputs": {
+        "cfg": 6.0,
+        "denoise": 0.55,
+        "latent_image": [
+          "7",
+          0
+        ],
+        "model": [
+          "4",
+          0
+        ],
+        "negative": [
+          "6",
+          0
+        ],
+        "positive": [
+          "5",
+          0
+        ],
+        "sampler_name": "euler",
+        "scheduler": "normal",
+        "seed": 147747579,
+        "steps": 16
+      }
+    },
+    "9": {
+      "class_type": "VAEDecode",
+      "inputs": {
+        "samples": [
+          "8",
+          0
+        ],
+        "vae": [
+          "4",
+          2
+        ]
+      }
+    }
+  }
+}
diff --git a/src/settings/_default.settings b/src/settings/_default.settings
index 82034c8c7..563a68c29 100644
--- a/src/settings/_default.settings
+++ b/src/settings/_default.settings
@@ -446,6 +446,14 @@
     "category": "Experimental",
     "setting": "qwidget-based-timeline"
   },
+  {
+    "value": "http://127.0.0.1:8188",
+    "title": "Comfy UI URL",
+    "type": "text",
+    "restart": false,
+    "category": "Experimental",
+    "setting": "comfy-ui-url"
+  },
   {
     "value": "start",
     "title": "Thumbnail Style",
diff --git a/src/themes/cosmic/images/ai-action-captions.svg b/src/themes/cosmic/images/ai-action-captions.svg
new file mode 100644
index 000000000..f434475da
--- /dev/null
+++ b/src/themes/cosmic/images/ai-action-captions.svg
@@ -0,0 +1,6 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <rect x="2.5" y="3" width="11" height="8" rx="1.7" stroke="#9EC8F7" stroke-width="1.3"/>
+  <path d="M5 7H11" stroke="#9EC8F7" stroke-width="1.3" stroke-linecap="round"/>
+  <path d="M5 9H9.5" stroke="#9EC8F7" stroke-width="1.3" stroke-linecap="round"/>
+  <path d="M7.2 11L6.1 13.2L8.8 11" stroke="#9EC8F7" stroke-width="1.3" stroke-linecap="round" stroke-linejoin="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-action-create-audio.svg b/src/themes/cosmic/images/ai-action-create-audio.svg
new file mode 100644
index 000000000..1f72fc6a3
--- /dev/null
+++ b/src/themes/cosmic/images/ai-action-create-audio.svg
@@ -0,0 +1,5 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <path d="M3.2 9.5H5.3L7.7 11.6V4.4L5.3 6.5H3.2V9.5Z" stroke="#9EC8F7" stroke-width="1.3" stroke-linejoin="round"/>
+  <path d="M9.6 6.2C10.4 7 10.4 9 9.6 9.8" stroke="#9EC8F7" stroke-width="1.3" stroke-linecap="round"/>
+  <path d="M11.6 4.8C13.1 6.3 13.1 9.7 11.6 11.2" stroke="#9EC8F7" stroke-width="1.3" stroke-linecap="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-action-create-image.svg b/src/themes/cosmic/images/ai-action-create-image.svg
new file mode 100644
index 000000000..63743d258
--- /dev/null
+++ b/src/themes/cosmic/images/ai-action-create-image.svg
@@ -0,0 +1,5 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <rect x="2.5" y="3" width="11" height="10" rx="1.8" stroke="#9EC8F7" stroke-width="1.3"/>
+  <circle cx="5.4" cy="6" r="1.1" fill="#9EC8F7"/>
+  <path d="M4 11L6.6 8.5L8.8 10.2L10.1 9.2L12 11" stroke="#9EC8F7" stroke-width="1.3" stroke-linecap="round" stroke-linejoin="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-action-create-music.svg b/src/themes/cosmic/images/ai-action-create-music.svg
new file mode 100644
index 000000000..ae1f2a233
--- /dev/null
+++ b/src/themes/cosmic/images/ai-action-create-music.svg
@@ -0,0 +1,9 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <g stroke="#9EC8F7" stroke-width="1.4" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M6.2 4.2V10.2" />
+    <path d="M10.8 3.2V9.2" />
+    <path d="M6.2 4.2L10.8 3.2" />
+    <circle cx="4.7" cy="11.3" r="1.3" />
+    <circle cx="9.3" cy="10.3" r="1.3" />
+  </g>
+</svg>
diff --git a/src/themes/cosmic/images/ai-action-create-video.svg b/src/themes/cosmic/images/ai-action-create-video.svg
new file mode 100644
index 000000000..f820a94b0
--- /dev/null
+++ b/src/themes/cosmic/images/ai-action-create-video.svg
@@ -0,0 +1,5 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <rect x="2.5" y="3" width="8.8" height="10" rx="1.7" stroke="#9EC8F7" stroke-width="1.3"/>
+  <path d="M7 6.4L9.3 8L7 9.6V6.4Z" fill="#9EC8F7"/>
+  <path d="M11.3 6.2L13.5 5.1V10.9L11.3 9.8" stroke="#9EC8F7" stroke-width="1.3" stroke-linejoin="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-action-restyle.svg b/src/themes/cosmic/images/ai-action-restyle.svg
new file mode 100644
index 000000000..ed62370bf
--- /dev/null
+++ b/src/themes/cosmic/images/ai-action-restyle.svg
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <path d="M8 2L9.4 5.6L13 7L9.4 8.4L8 12L6.6 8.4L3 7L6.6 5.6L8 2Z" stroke="#9EC8F7" stroke-width="1.3" stroke-linejoin="round"/>
+  <circle cx="8" cy="7" r="1" fill="#9EC8F7"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-action-scenes.svg b/src/themes/cosmic/images/ai-action-scenes.svg
new file mode 100644
index 000000000..010b9a6cd
--- /dev/null
+++ b/src/themes/cosmic/images/ai-action-scenes.svg
@@ -0,0 +1,6 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <rect x="2.5" y="3.5" width="11" height="9" rx="1.5" stroke="#9EC8F7" stroke-width="1.3"/>
+  <path d="M6.2 3.5V12.5" stroke="#9EC8F7" stroke-width="1.3"/>
+  <path d="M9.8 3.5V12.5" stroke="#9EC8F7" stroke-width="1.3"/>
+  <path d="M2.5 8H13.5" stroke="#9EC8F7" stroke-width="1" opacity="0.8"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-action-smooth.svg b/src/themes/cosmic/images/ai-action-smooth.svg
new file mode 100644
index 000000000..369860f53
--- /dev/null
+++ b/src/themes/cosmic/images/ai-action-smooth.svg
@@ -0,0 +1,5 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <rect x="2.3" y="5" width="6.8" height="6.8" rx="1.7" stroke="#9EC8F7" stroke-width="1.3"/>
+  <rect x="4.6" y="4" width="6.8" height="6.8" rx="1.7" stroke="#9EC8F7" stroke-width="1.3" opacity="0.82"/>
+  <rect x="6.9" y="3" width="6.8" height="6.8" rx="1.7" stroke="#9EC8F7" stroke-width="1.3" opacity="0.64"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-action-upscale.svg b/src/themes/cosmic/images/ai-action-upscale.svg
new file mode 100644
index 000000000..523d3f638
--- /dev/null
+++ b/src/themes/cosmic/images/ai-action-upscale.svg
@@ -0,0 +1,5 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <rect x="2.5" y="2.5" width="11" height="11" rx="2" stroke="#9EC8F7" stroke-width="1.3"/>
+  <path d="M5.2 10.8L10.8 5.2" stroke="#9EC8F7" stroke-width="1.3" stroke-linecap="round"/>
+  <path d="M7.9 5.2H10.8V8.1" stroke="#9EC8F7" stroke-width="1.3" stroke-linecap="round" stroke-linejoin="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-category-create.svg b/src/themes/cosmic/images/ai-category-create.svg
new file mode 100644
index 000000000..ac9a400ba
--- /dev/null
+++ b/src/themes/cosmic/images/ai-category-create.svg
@@ -0,0 +1,5 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <rect x="3" y="3" width="10" height="10" rx="2" stroke="#9EC8F7" stroke-width="1.3"/>
+  <path d="M8 5V11" stroke="#9EC8F7" stroke-width="1.3" stroke-linecap="round"/>
+  <path d="M5 8H11" stroke="#9EC8F7" stroke-width="1.3" stroke-linecap="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-track-point-negative.svg b/src/themes/cosmic/images/ai-track-point-negative.svg
new file mode 100644
index 000000000..78befa0f4
--- /dev/null
+++ b/src/themes/cosmic/images/ai-track-point-negative.svg
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none">
+  <circle cx="12" cy="12" r="7" fill="#E05757"/>
+  <path d="M8 12H16" stroke="#2A0F12" stroke-width="2" stroke-linecap="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-track-point-positive.svg b/src/themes/cosmic/images/ai-track-point-positive.svg
new file mode 100644
index 000000000..8f885486c
--- /dev/null
+++ b/src/themes/cosmic/images/ai-track-point-positive.svg
@@ -0,0 +1,5 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none">
+  <circle cx="12" cy="12" r="7" fill="#53A0ED"/>
+  <path d="M12 8V16" stroke="#0F1622" stroke-width="2" stroke-linecap="round"/>
+  <path d="M8 12H16" stroke="#0F1622" stroke-width="2" stroke-linecap="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-track-rect-negative.svg b/src/themes/cosmic/images/ai-track-rect-negative.svg
new file mode 100644
index 000000000..2ab0de07d
--- /dev/null
+++ b/src/themes/cosmic/images/ai-track-rect-negative.svg
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none">
+  <rect x="4" y="5" width="16" height="14" rx="1.5" stroke="#E05757" stroke-width="2"/>
+  <path d="M8 12H16" stroke="#E05757" stroke-width="2" stroke-linecap="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/ai-track-rect-positive.svg b/src/themes/cosmic/images/ai-track-rect-positive.svg
new file mode 100644
index 000000000..17382fe16
--- /dev/null
+++ b/src/themes/cosmic/images/ai-track-rect-positive.svg
@@ -0,0 +1,5 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none">
+  <rect x="4" y="5" width="16" height="14" rx="1.5" stroke="#53A0ED" stroke-width="2"/>
+  <path d="M12 8V16" stroke="#53A0ED" stroke-width="2" stroke-linecap="round"/>
+  <path d="M8 12H16" stroke="#53A0ED" stroke-width="2" stroke-linecap="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/tool-generate-sparkle.svg b/src/themes/cosmic/images/tool-generate-sparkle.svg
new file mode 100644
index 000000000..de7946858
--- /dev/null
+++ b/src/themes/cosmic/images/tool-generate-sparkle.svg
@@ -0,0 +1,8 @@
+<svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
+  <path d="M9.9999 2.2L11.3706 6.0793L15.2499 7.45L11.3706 8.8207L9.9999 12.7L8.6292 8.8207L4.7499 7.45L8.6292 6.0793L9.9999 2.2Z"
+        stroke="#91C3FF" stroke-width="1.2" stroke-linejoin="round"/>
+  <path d="M15.4999 11.3L16.1602 13.1397L17.9999 13.8L16.1602 14.4603L15.4999 16.3L14.8396 14.4603L12.9999 13.8L14.8396 13.1397L15.4999 11.3Z"
+        stroke="#91C3FF" stroke-width="1.2" stroke-linejoin="round"/>
+  <path d="M4.1999 12.1L4.6201 13.2798L5.7999 13.7L4.6201 14.1202L4.1999 15.3L3.7797 14.1202L2.5999 13.7L3.7797 13.2798L4.1999 12.1Z"
+        stroke="#91C3FF" stroke-width="1.2" stroke-linejoin="round"/>
+</svg>
diff --git a/src/themes/cosmic/images/tool-media-repeat.svg b/src/themes/cosmic/images/tool-media-repeat.svg
new file mode 100644
index 000000000..637a484c1
--- /dev/null
+++ b/src/themes/cosmic/images/tool-media-repeat.svg
@@ -0,0 +1,6 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none">
+  <path d="M7 7H17L14.8 4.8" stroke="#9BCBFF" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
+  <path d="M17 17H7L9.2 19.2" stroke="#9BCBFF" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"/>
+  <path d="M17 7C19.2 7 21 8.8 21 11V12" stroke="#9BCBFF" stroke-width="1.8" stroke-linecap="round"/>
+  <path d="M7 17C4.8 17 3 15.2 3 13V12" stroke="#9BCBFF" stroke-width="1.8" stroke-linecap="round"/>
+</svg>
diff --git a/src/themes/cosmic/theme.py b/src/themes/cosmic/theme.py
index 912490a0b..d0046de54 100644
--- a/src/themes/cosmic/theme.py
+++ b/src/themes/cosmic/theme.py
@@ -384,10 +384,15 @@ def __init__(self, app):
 
 QTabWidget#exportTabs QTabBar::tab,
 QTabWidget#tabCategories QTabBar::tab,
-QTabWidget#tabCredits QTabBar::tab{
+QTabWidget#tabCredits QTabBar::tab,
+QTabWidget#generateTabs QTabBar::tab {
     margin-bottom: 10px;
 }
 
+QTabWidget#generateTabs QTabBar::tab:selected {
+    border-bottom: 1.2px solid #53a0ed;
+}
+
 QTabBar::tab:selected {
     color: rgba(145, 195, 255, 1.0);
 }
@@ -568,6 +573,34 @@ def __init__(self, app):
     background-color: #141923;
 }
 
+QDialog#generateDialog QTabWidget#generateTabs::pane {
+    border: none;
+    background-color: #141923;
+}
+
+QDialog#generateDialog QTabWidget#generateTabs QWidget#pagePrompt,
+QDialog#generateDialog QTabWidget#generateTabs QWidget#pagePoints,
+QDialog#generateDialog QTabWidget#generateTabs QWidget#pageHighlight {
+    background-color: #141923;
+    border: none;
+}
+
+QDialog#generateDialog QLineEdit,
+QDialog#generateDialog QTextEdit,
+QDialog#generateDialog QComboBox {
+    background-color: #141923;
+    color: #91C3FF;
+    border: 1.2px solid transparent;
+    border-radius: 4px;
+    padding: 6px 8px;
+}
+
+QDialog#generateDialog QLineEdit:focus,
+QDialog#generateDialog QTextEdit:focus,
+QDialog#generateDialog QComboBox:focus {
+    border: 1.2px solid #53a0ed;
+}
+
 QWidget#cutting QPushButton#btnStart,QPushButton#btnEnd  {
     border: 1px solid #006EE6;
 }
diff --git a/src/themes/humanity/images/ai-track-point-negative.svg b/src/themes/humanity/images/ai-track-point-negative.svg
new file mode 100644
index 000000000..8e8b1c649
--- /dev/null
+++ b/src/themes/humanity/images/ai-track-point-negative.svg
@@ -0,0 +1,4 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <circle cx="8" cy="8" r="5.33333" stroke="#E05757" stroke-width="1.33333"/>
+    <path d="M5.33333 8H10.6667" stroke="#E05757" stroke-width="1.33333" stroke-linecap="round"/>
+</svg>
diff --git a/src/themes/humanity/images/ai-track-point-positive.svg b/src/themes/humanity/images/ai-track-point-positive.svg
new file mode 100644
index 000000000..5a981cce2
--- /dev/null
+++ b/src/themes/humanity/images/ai-track-point-positive.svg
@@ -0,0 +1,4 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <circle cx="8" cy="8" r="5.33333" stroke="#91C3FF" stroke-width="1.33333"/>
+    <path d="M8 5.33334V10.6667M5.33333 8H10.6667" stroke="#91C3FF" stroke-width="1.33333" stroke-linecap="round"/>
+</svg>
diff --git a/src/themes/humanity/images/ai-track-rect-negative.svg b/src/themes/humanity/images/ai-track-rect-negative.svg
new file mode 100644
index 000000000..bdae7ef53
--- /dev/null
+++ b/src/themes/humanity/images/ai-track-rect-negative.svg
@@ -0,0 +1,4 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <rect x="2.66667" y="3.33333" width="10.6667" height="9.33333" rx="1.33333" stroke="#E05757" stroke-width="1.33333"/>
+    <path d="M5.33333 8H10.6667" stroke="#E05757" stroke-width="1.33333" stroke-linecap="round"/>
+</svg>
diff --git a/src/themes/humanity/images/ai-track-rect-positive.svg b/src/themes/humanity/images/ai-track-rect-positive.svg
new file mode 100644
index 000000000..bbb1b4322
--- /dev/null
+++ b/src/themes/humanity/images/ai-track-rect-positive.svg
@@ -0,0 +1,4 @@
+<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <rect x="2.66667" y="3.33333" width="10.6667" height="9.33333" rx="1.33333" stroke="#91C3FF" stroke-width="1.33333"/>
+    <path d="M8 5.33334V10.6667M5.33333 8H10.6667" stroke="#91C3FF" stroke-width="1.33333" stroke-linecap="round"/>
+</svg>
diff --git a/src/windows/add_to_timeline.py b/src/windows/add_to_timeline.py
index ee8d16b22..900ea923f 100644
--- a/src/windows/add_to_timeline.py
+++ b/src/windows/add_to_timeline.py
@@ -40,6 +40,7 @@
 from classes.query import Clip, Transition
 from classes.app import get_app
 from classes.metrics import track_metric_screen
+from classes.clip_utils import apply_file_caption_to_clip
 from windows.views.add_to_timeline_treeview import TimelineTreeView
 
 import openshot
@@ -218,6 +219,9 @@ def accept(self):
             if not new_clip.get("reader"):
                 continue  # Skip to next file
 
+            # If the source file has stored caption text, attach a Caption effect to this new clip.
+            apply_file_caption_to_clip(new_clip, file)
+
             # Check for optional start and end attributes
             start_time = 0
             end_time = new_clip["reader"]["duration"]
diff --git a/src/windows/cutting.py b/src/windows/cutting.py
index 52dee73c6..e9b24001e 100644
--- a/src/windows/cutting.py
+++ b/src/windows/cutting.py
@@ -30,7 +30,8 @@
 import json
 
 from PyQt5.QtCore import pyqtSignal, QTimer
-from PyQt5.QtWidgets import QDialog, QMessageBox, QSizePolicy, QSlider
+from PyQt5.QtGui import QIcon
+from PyQt5.QtWidgets import QDialog, QMessageBox, QSizePolicy, QSlider, QToolButton, QLineEdit
 from PyQt5.QtCore import Qt, QEvent
 import openshot  # Python module for libopenshot (required video editing module installed separately)
 
@@ -66,6 +67,7 @@ def __init__(self, file=None, preview=False):
         self._preview_autoplay_attempts = 0
         self._shutdown_in_progress = False
         self._close_after_shutdown = False
+        self.loop_playback = bool(preview)
 
         # Create dialog class
         QDialog.__init__(self)
@@ -75,6 +77,13 @@ def __init__(self, file=None, preview=False):
 
         # Init UI
         ui_util.init_ui(self)
+        self.setWindowFlags(
+            (self.windowFlags() & ~Qt.Dialog)
+            | Qt.Window
+            | Qt.WindowMinMaxButtonsHint
+            | Qt.WindowMaximizeButtonHint
+        )
+        self.setSizeGripEnabled(True)
 
         # Track metrics
         track_metric_screen("cutting-screen")
@@ -183,6 +192,8 @@ def __init__(self, file=None, preview=False):
         self.sliderVideo.setMaximum(self.video_length)
         self.sliderVideo.setSingleStep(1)
         self.sliderVideo.setPageStep(24)
+        if self.is_preview_mode:
+            self._build_preview_repeat_button()
 
         # Initialize first frame display.
         # For cutting mode, preserve the legacy two-step seek refresh.
@@ -214,6 +225,42 @@ def __init__(self, file=None, preview=False):
         self.slider_timer.timeout.connect(self.sliderVideo_timeout)
         self.initialized = True
 
+    def _build_preview_repeat_button(self):
+        _ = get_app()._tr
+        self.btnRepeat = QToolButton(self)
+        self.btnRepeat.setObjectName("btnRepeat")
+        self.btnRepeat.setCheckable(True)
+        self.btnRepeat.setChecked(True)
+        self.btnRepeat.setAutoRaise(True)
+        self.btnRepeat.setFixedSize(24, 24)
+        self.btnRepeat.setToolTip(_("Repeat"))
+        self.btnRepeat.setStyleSheet(
+            "QToolButton#btnRepeat { border-radius: 4px; }"
+            "QToolButton#btnRepeat:checked { background-color: rgba(83,160,237,80); }"
+        )
+        self.btnRepeat.toggled.connect(self._on_repeat_toggled)
+        self.horizontalLayout_3.insertWidget(2, self.btnRepeat)
+
+        icon = ui_util.get_icon("media-playlist-repeat")
+        if not icon:
+            icon_path = os.path.join(info.PATH, "themes", "cosmic", "images", "tool-media-repeat.svg")
+            icon = QIcon(icon_path)
+        self.btnRepeat.setIcon(icon)
+
+    def _on_repeat_toggled(self, checked):
+        self.loop_playback = bool(checked)
+
+    def keyPressEvent(self, event):
+        if event and event.key() == Qt.Key_Space:
+            focused = self.focusWidget()
+            if focused and isinstance(focused, QLineEdit):
+                return super(Cutting, self).keyPressEvent(event)
+            if hasattr(self, "btnPlay") and self.btnPlay is not None:
+                self.btnPlay.click()
+                event.accept()
+                return
+        return super(Cutting, self).keyPressEvent(event)
+
     def eventFilter(self, obj, event):
         if event.type() == event.KeyPress and obj is self.txtName:
             # Handle ENTER key to create new clip
@@ -287,6 +334,14 @@ def btnPlay_clicked(self, force=None):
         if self.btnPlay.isChecked():
             log.info('play (icon to pause)')
             ui_util.setup_icon(self, self.btnPlay, "actionPlay", "media-playback-pause")
+            # In non-loop mode, replay from the beginning when currently at end.
+            if not self.loop_playback:
+                try:
+                    current_pos = int(self.preview_thread.player.Position())
+                except Exception:
+                    current_pos = 1
+                if current_pos >= int(self.video_length):
+                    self.SeekSignal.emit(1)
             self.PlaySignal.emit()
         else:
             log.info('pause (icon to play)')
@@ -332,9 +387,20 @@ def _preview_ready(self):
             QTimer.singleShot(0, self._start_preview_autoplay)
 
     def _preview_mode_changed(self, mode):
+        play_mode = getattr(openshot, "PLAYBACK_PLAY", None)
+        paused_mode = getattr(openshot, "PLAYBACK_PAUSED", getattr(openshot, "PLAYBACK_PAUSE", None))
+        stop_mode = getattr(openshot, "PLAYBACK_STOPPED", getattr(openshot, "PLAYBACK_STOP", None))
+
+        # Keep the play button state visually in sync with current playback mode.
+        if mode == play_mode and not self.btnPlay.isChecked():
+            self.btnPlay.setChecked(True)
+            ui_util.setup_icon(self, self.btnPlay, "actionPlay", "media-playback-pause")
+        elif mode in (paused_mode, stop_mode) and self.btnPlay.isChecked():
+            self.btnPlay.setChecked(False)
+            ui_util.setup_icon(self, self.btnPlay, "actionPlay", "media-playback-start")
+
         if not self.is_preview_mode or not self._preview_autoplay_active:
             return
-        paused_mode = getattr(openshot, "PLAYBACK_PAUSED", getattr(openshot, "PLAYBACK_PAUSE", None))
         if paused_mode is not None and mode == paused_mode:
             QTimer.singleShot(0, self._start_preview_autoplay)
 
diff --git a/src/windows/generate.py b/src/windows/generate.py
new file mode 100644
index 000000000..8f89e1794
--- /dev/null
+++ b/src/windows/generate.py
@@ -0,0 +1,547 @@
+"""
+ @file
+ @brief This file contains the Generate media dialog.
+ @author Jonathan Thomas <jonathan@openshot.org>
+
+ @section LICENSE
+
+ Copyright (c) 2008-2026 OpenShot Studios, LLC
+ (http://www.openshotstudios.com). This file is part of
+ OpenShot Video Editor (http://www.openshot.org), an open-source project
+ dedicated to delivering high quality video editing and animation solutions
+ to the world.
+
+ OpenShot Video Editor is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ OpenShot Video Editor is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with OpenShot Library.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+import os
+import json
+import functools
+
+from PyQt5.QtCore import Qt
+from PyQt5.QtGui import QIcon, QPixmap, QColor
+from PyQt5.QtWidgets import (
+    QDialog, QVBoxLayout, QHBoxLayout, QFormLayout, QLabel, QLineEdit,
+    QComboBox, QTextEdit, QTabWidget, QWidget, QPushButton, QMessageBox,
+    QDoubleSpinBox, QSpinBox
+)
+
+from classes import info
+from classes.logger import log
+from classes.thumbnail import GetThumbPath
+from windows.region import SelectRegion
+from windows.color_picker import ColorPicker
+
+
+class GenerateMediaDialog(QDialog):
+    """Minimal generate dialog with a simple default-first layout."""
+
+    PREVIEW_WIDTH = 180
+    PREVIEW_HEIGHT = 128
+
+    def __init__(
+        self,
+        source_file=None,
+        templates=None,
+        preselected_template_id=None,
+        dialog_title=None,
+        parent=None,
+    ):
+        super().__init__(parent)
+        self.source_file = source_file
+        self.templates = templates or []
+        self.preselected_template_id = str(preselected_template_id or "").strip()
+        self._coordinates_positive_text = ""
+        self._coordinates_negative_text = ""
+        self._rectangles_positive_text = ""
+        self._rectangles_negative_text = ""
+        self._auto_mode = False
+        self._tracking_selection_payload = {}
+        self.setObjectName("generateDialog")
+        self.setWindowTitle(str(dialog_title or "AI Tools"))
+        self.setMinimumWidth(620)
+        self.setMinimumHeight(460)
+
+        root = QVBoxLayout(self)
+        root.setContentsMargins(14, 14, 14, 14)
+        root.setSpacing(10)
+
+        root.addLayout(self._build_top_block())
+
+        self.tabs = QTabWidget(self)
+        self.tabs.setObjectName("generateTabs")
+        self.page_prompt = self._build_prompt_tab()
+        self.page_points = self._build_points_tab()
+        self.page_highlight = self._build_highlight_tab()
+        self.prompt_tab_index = self.tabs.addTab(self.page_prompt, "Prompt")
+        self.points_tab_index = self.tabs.addTab(self.page_points, "Tracking")
+        self.highlight_tab_index = self.tabs.addTab(self.page_highlight, "Highlight")
+        root.addWidget(self.tabs, 1)
+
+        button_row = QHBoxLayout()
+        button_row.addStretch(1)
+        self.cancel_button = QPushButton("Cancel")
+        self.generate_button = QPushButton("Generate")
+        self.generate_button.setIcon(QIcon(":/icons/Humanity/actions/16/star.svg"))
+        self.cancel_button.clicked.connect(self.reject)
+        self.generate_button.clicked.connect(self._on_generate_clicked)
+        button_row.addWidget(self.cancel_button)
+        button_row.addWidget(self.generate_button)
+        root.addLayout(button_row)
+        self._initialize_dialog_state()
+
+    def _current_coordinates_text(self):
+        coordinates_positive = str(self._coordinates_positive_text or "").strip()
+        coordinates_negative = str(self._coordinates_negative_text or "").strip()
+        rects_positive = str(self._rectangles_positive_text or "").strip()
+        rects_negative = str(self._rectangles_negative_text or "").strip()
+        auto_mode = bool(self._auto_mode)
+        tracking_payload = dict(self._tracking_selection_payload or {})
+        if not coordinates_positive and hasattr(self, "points_preview"):
+            preview_text = self.points_preview.toPlainText().strip()
+            if preview_text.startswith("{"):
+                try:
+                    payload = json.loads(preview_text.replace("'", "\""))
+                    coordinates_positive = str(payload.get("positive", "")).strip() or coordinates_positive
+                    coordinates_negative = str(payload.get("negative", "")).strip() or coordinates_negative
+                    rects_positive = str(payload.get("positive_rects", "")).strip() or rects_positive
+                    rects_negative = str(payload.get("negative_rects", "")).strip() or rects_negative
+                    auto_mode = bool(payload.get("auto_mode", auto_mode))
+                    if isinstance(payload.get("tracking_selection"), dict):
+                        tracking_payload = payload.get("tracking_selection")
+                except Exception:
+                    pass
+        prompt_text = self.prompt_edit.toPlainText().strip()
+        return coordinates_positive, coordinates_negative, rects_positive, rects_negative, auto_mode, tracking_payload, prompt_text
+
+    def get_payload(self):
+        coordinates_positive, coordinates_negative, rects_positive, rects_negative, auto_mode, tracking_payload, prompt_text = self._current_coordinates_text()
+        highlight_color = self.highlight_color.name(QColor.HexArgb) if hasattr(self, "highlight_color") else ""
+        border_color = self.border_color.name(QColor.HexArgb) if hasattr(self, "border_color") else ""
+        border_width = int(self.border_width_spin.value()) if hasattr(self, "border_width_spin") else 0
+        highlight_opacity = float(self.highlight_opacity_spin.value()) if hasattr(self, "highlight_opacity_spin") else 0.0
+        mask_brightness = float(self.mask_brightness_spin.value()) if hasattr(self, "mask_brightness_spin") else 1.0
+        background_brightness = float(self.background_brightness_spin.value()) if hasattr(self, "background_brightness_spin") else 1.0
+        return {
+            "name": self.name_edit.text().strip(),
+            "template_id": self.template_combo.currentData() or self.template_combo.currentText(),
+            "prompt": prompt_text,
+            "coordinates_positive": coordinates_positive,
+            "coordinates_negative": coordinates_negative,
+            "rectangles_positive": rects_positive,
+            "rectangles_negative": rects_negative,
+            "auto_mode": bool(auto_mode),
+            "tracking_selection": tracking_payload,
+            "highlight_color": highlight_color,
+            "highlight_opacity": highlight_opacity,
+            "border_color": border_color,
+            "border_width": border_width,
+            "mask_brightness": mask_brightness,
+            "background_brightness": background_brightness,
+        }
+
+    def _build_top_block(self):
+        block = QHBoxLayout()
+        block.setSpacing(12)
+
+        if self.source_file:
+            self.thumbnail_label = QLabel()
+            self.thumbnail_label.setFixedSize(self.PREVIEW_WIDTH, self.PREVIEW_HEIGHT)
+            self.thumbnail_label.setAlignment(Qt.AlignCenter)
+            self.thumbnail_label.setStyleSheet("border: 1px solid palette(mid);")
+            self._load_thumbnail()
+            block.addWidget(self.thumbnail_label, 0)
+
+        setup_form = QFormLayout()
+        setup_form.setContentsMargins(0, 0, 0, 0)
+        setup_form.setVerticalSpacing(8)
+
+        default_name = "generation"
+        if self.source_file:
+            path = self.source_file.data.get("path", "")
+            if path:
+                default_name = "{}_gen".format(os.path.splitext(os.path.basename(path))[0])
+
+        self.name_edit = QLineEdit()
+        self.name_edit.setPlaceholderText("Output file name")
+        self.name_edit.setText(default_name)
+        setup_form.addRow("Name", self.name_edit)
+
+        self.template_combo = QComboBox()
+        if self.templates:
+            for template in self.templates:
+                self.template_combo.addItem(template.get("name", ""), template.get("id", ""))
+        else:
+            self.template_combo.addItem("Basic Text to Image", "txt2img-basic")
+        if self.preselected_template_id:
+            index = self.template_combo.findData(self.preselected_template_id)
+            if index >= 0:
+                self.template_combo.setCurrentIndex(index)
+        self.template_combo.currentIndexChanged.connect(self._on_template_changed)
+        setup_form.addRow("Template", self.template_combo)
+
+        if self.source_file:
+            source_path = self.source_file.data.get("path", "")
+            source_label = QLabel(os.path.basename(source_path))
+            source_label.setToolTip(source_path)
+            setup_form.addRow("Source", source_label)
+
+        right_container = QWidget(self)
+        right_container.setLayout(setup_form)
+        block.addWidget(right_container, 1)
+        return block
+
+    def _build_prompt_tab(self):
+        tab = QWidget(self)
+        tab.setObjectName("pagePrompt")
+        layout = QVBoxLayout(tab)
+        layout.setContentsMargins(8, 8, 8, 8)
+        self.prompt_edit = QTextEdit()
+        self.prompt_edit.setPlaceholderText("Prompt (optional)")
+        self.prompt_edit.setMinimumHeight(140)
+        layout.addWidget(self.prompt_edit)
+        return tab
+
+    def _build_points_tab(self):
+        tab = QWidget(self)
+        tab.setObjectName("pagePoints")
+        layout = QVBoxLayout(tab)
+        layout.setContentsMargins(8, 8, 8, 8)
+        controls = QHBoxLayout()
+        self.pick_points_button = QPushButton("Select objects for tracking")
+        self.clear_points_button = QPushButton("Clear")
+        self.pick_points_button.clicked.connect(self._choose_tracking_clicked)
+        self.clear_points_button.clicked.connect(self._clear_points_clicked)
+        controls.addWidget(self.pick_points_button)
+        controls.addWidget(self.clear_points_button)
+        controls.addStretch(1)
+        layout.addLayout(controls)
+
+        self.points_preview = QTextEdit()
+        self.points_preview.setReadOnly(True)
+        self.points_preview.setMinimumHeight(90)
+        layout.addWidget(self.points_preview)
+        layout.addStretch(1)
+        return tab
+
+    def _build_highlight_tab(self):
+        tab = QWidget(self)
+        tab.setObjectName("pageHighlight")
+        layout = QFormLayout(tab)
+        layout.setContentsMargins(8, 8, 8, 8)
+        layout.setVerticalSpacing(8)
+        self.highlight_color = QColor("#2EA6FF")
+        self.highlight_color.setAlphaF(0.70)
+        self.border_color = QColor("#FFFFFF")
+        self.border_color.setAlphaF(1.0)
+        self.highlight_color_button = QPushButton("Choose Color")
+        self.highlight_color_button.clicked.connect(self._pick_highlight_color)
+        self.border_color_button = QPushButton("Choose Color")
+        self.border_color_button.clicked.connect(self._pick_border_color)
+        self.highlight_opacity_spin = QDoubleSpinBox()
+        self.highlight_opacity_spin.setRange(0.0, 1.0)
+        self.highlight_opacity_spin.setSingleStep(0.05)
+        self.highlight_opacity_spin.setValue(0.28)
+        self.border_width_spin = QSpinBox()
+        self.border_width_spin.setRange(0, 64)
+        self.border_width_spin.setValue(2)
+        self.mask_brightness_spin = QDoubleSpinBox()
+        self.mask_brightness_spin.setRange(0.0, 3.0)
+        self.mask_brightness_spin.setSingleStep(0.05)
+        self.mask_brightness_spin.setValue(1.15)
+        self.background_brightness_spin = QDoubleSpinBox()
+        self.background_brightness_spin.setRange(0.0, 3.0)
+        self.background_brightness_spin.setSingleStep(0.05)
+        self.background_brightness_spin.setValue(0.75)
+        layout.addRow("Background Color", self.highlight_color_button)
+        layout.addRow("Background Opacity", self.highlight_opacity_spin)
+        layout.addRow("Border Color", self.border_color_button)
+        layout.addRow("Border Width", self.border_width_spin)
+        layout.addRow("Mask Brightness", self.mask_brightness_spin)
+        layout.addRow("Background Brightness", self.background_brightness_spin)
+        self._update_highlight_color_button()
+        self._update_border_color_button()
+        return tab
+
+    @staticmethod
+    def _best_contrast(bg):
+        colrgb = bg.getRgbF()
+        lum = (0.299 * colrgb[0] + 0.587 * colrgb[1] + 0.114 * colrgb[2])
+        return QColor(Qt.white) if lum < 0.5 else QColor(Qt.black)
+
+    def _color_callback(self, setter_fn, refresh_fn, color):
+        if not color or not color.isValid():
+            return
+        setter_fn(color)
+        refresh_fn()
+
+    def _pick_highlight_color(self):
+        callback = functools.partial(
+            self._color_callback,
+            self._set_highlight_color,
+            self._update_highlight_color_button,
+        )
+        ColorPicker(
+            self.highlight_color,
+            parent=self,
+            title="Select a Color",
+            callback=callback,
+        )
+
+    def _pick_border_color(self):
+        callback = functools.partial(
+            self._color_callback,
+            self._set_border_color,
+            self._update_border_color_button,
+        )
+        ColorPicker(
+            self.border_color,
+            parent=self,
+            title="Select a Color",
+            callback=callback,
+        )
+
+    def _set_highlight_color(self, color):
+        self.highlight_color = QColor(color)
+
+    def _set_border_color(self, color):
+        self.border_color = QColor(color)
+
+    def _update_highlight_color_button(self):
+        fg = self._best_contrast(self.highlight_color)
+        self.highlight_color_button.setStyleSheet(
+            "QPushButton{background-color:%s;color:%s;}" %
+            (self.highlight_color.name(QColor.HexArgb), fg.name())
+        )
+
+    def _update_border_color_button(self):
+        fg = self._best_contrast(self.border_color)
+        self.border_color_button.setStyleSheet(
+            "QPushButton{background-color:%s;color:%s;}" %
+            (self.border_color.name(QColor.HexArgb), fg.name())
+        )
+
+    def _load_thumbnail(self):
+        path = ""
+        media_type = self.source_file.data.get("media_type")
+        if media_type in ["video", "image"]:
+            path = GetThumbPath(self.source_file.id, 1)
+        elif media_type == "audio":
+            path = os.path.join(info.PATH, "images", "AudioThumbnail.svg")
+
+        pix = QPixmap(path) if path else QPixmap()
+        if not pix.isNull():
+            pix = pix.scaled(
+                self.PREVIEW_WIDTH - 2,
+                self.PREVIEW_HEIGHT - 2,
+                Qt.KeepAspectRatio,
+                Qt.SmoothTransformation,
+            )
+            self.thumbnail_label.setPixmap(pix)
+        else:
+            self.thumbnail_label.setText("No Preview")
+
+    def _on_generate_clicked(self):
+        if not self.name_edit.text().strip():
+            self.name_edit.setFocus(Qt.TabFocusReason)
+            return
+        if self._is_track_object_template():
+            coordinates_positive, _coordinates_negative, rects_positive, _rects_negative, auto_mode, _tracking_payload, prompt_text = self._current_coordinates_text()
+            if (not auto_mode) and (not coordinates_positive) and (not rects_positive) and (not str(prompt_text or "").strip()):
+                QMessageBox.warning(
+                    self,
+                    "Missing Selection",
+                    "No SAM2 seed was provided. Add tracking points/rectangles or enter a prompt.",
+                )
+                self.tabs.setCurrentWidget(self.page_points)
+                return
+        self.accept()
+
+    def _is_track_object_template(self):
+        template_id = str(self.template_combo.currentData() or "").strip().lower()
+        return template_id in (
+            "video-blur-anything-sam2",
+            "video-mask-anything-sam2",
+            "video-highlight-anything-sam2",
+            "image-blur-anything-sam2",
+            "image-mask-anything-sam2",
+            "image-highlight-anything-sam2",
+        )
+
+    def _is_highlight_template(self):
+        template_id = str(self.template_combo.currentData() or "").strip().lower()
+        return template_id in ("video-highlight-anything-sam2", "image-highlight-anything-sam2")
+
+    def _on_template_changed(self, index):
+        _ = index
+        is_track_template = self._is_track_object_template()
+        is_highlight_template = self._is_highlight_template()
+        self._set_tab_visible(self.prompt_tab_index, is_track_template)
+        self._set_tab_visible(self.points_tab_index, is_track_template)
+        self._set_tab_visible(self.highlight_tab_index, is_track_template and is_highlight_template)
+        self.pick_points_button.setEnabled(bool(self.source_file) and is_track_template)
+        self.clear_points_button.setEnabled(is_track_template)
+        if is_track_template:
+            self.pick_points_button.setText("Select objects for tracking")
+            self.tabs.setCurrentWidget(self.page_points)
+        else:
+            self._set_tab_visible(self.prompt_tab_index, True)
+            self._set_tab_visible(self.points_tab_index, False)
+            self._set_tab_visible(self.highlight_tab_index, False)
+            self.tabs.setCurrentWidget(self.page_prompt)
+
+    def _choose_tracking_clicked(self):
+        if not self.source_file:
+            return
+
+        win = SelectRegion(file=self.source_file, clip=None, selection_mode="annotate")
+        if win.exec_() != QDialog.Accepted:
+            return
+
+        selection_payload = win.selection_payload()
+        frame_size = win.videoPreview.curr_frame_size
+        if not frame_size:
+            frame_w = float(max(win.viewport_rect.width(), 1))
+            frame_h = float(max(win.viewport_rect.height(), 1))
+        else:
+            frame_w = float(max(frame_size.width(), 1))
+            frame_h = float(max(frame_size.height(), 1))
+        src_w = float(max(getattr(win, "width", 1), 1))
+        src_h = float(max(getattr(win, "height", 1), 1))
+
+        def _scale_point_dict(p):
+            if not isinstance(p, dict):
+                return None
+            try:
+                x_in = float(p.get("x", 0.0))
+                y_in = float(p.get("y", 0.0))
+            except Exception:
+                return None
+            x_norm = max(min(x_in, float(max(frame_w - 1.0, 0.0))), 0.0)
+            y_norm = max(min(y_in, float(max(frame_h - 1.0, 0.0))), 0.0)
+            x_abs = int(round((x_norm / frame_w) * src_w))
+            y_abs = int(round((y_norm / frame_h) * src_h))
+            return {"x": x_abs, "y": y_abs}
+
+        def _scale_rect_dict(r):
+            if not isinstance(r, dict):
+                return None
+            try:
+                x1_in = float(r.get("x1", 0.0))
+                y1_in = float(r.get("y1", 0.0))
+                x2_in = float(r.get("x2", 0.0))
+                y2_in = float(r.get("y2", 0.0))
+            except Exception:
+                return None
+            x1 = max(min(x1_in, float(max(frame_w - 1.0, 0.0))), 0.0)
+            y1 = max(min(y1_in, float(max(frame_h - 1.0, 0.0))), 0.0)
+            x2 = max(min(x2_in, float(max(frame_w - 1.0, 0.0))), 0.0)
+            y2 = max(min(y2_in, float(max(frame_h - 1.0, 0.0))), 0.0)
+            if x2 < x1:
+                x1, x2 = x2, x1
+            if y2 < y1:
+                y1, y2 = y2, y1
+            sx1 = int(round((x1 / frame_w) * src_w))
+            sy1 = int(round((y1 / frame_h) * src_h))
+            sx2 = int(round((x2 / frame_w) * src_w))
+            sy2 = int(round((y2 / frame_h) * src_h))
+            return {"x1": sx1, "y1": sy1, "x2": sx2, "y2": sy2}
+
+        # Normalize all frame annotations to source frame coordinates.
+        if isinstance(selection_payload, dict) and isinstance(selection_payload.get("frames"), dict):
+            normalized_frames = {}
+            for frame_key, frame_data in selection_payload.get("frames", {}).items():
+                if not isinstance(frame_data, dict):
+                    continue
+                pos_pts = [_scale_point_dict(p) for p in (frame_data.get("positive_points") or [])]
+                neg_pts = [_scale_point_dict(p) for p in (frame_data.get("negative_points") or [])]
+                pos_rects = [_scale_rect_dict(r) for r in (frame_data.get("positive_rects") or [])]
+                neg_rects = [_scale_rect_dict(r) for r in (frame_data.get("negative_rects") or [])]
+                normalized_frames[str(frame_key)] = {
+                    "positive_points": [p for p in pos_pts if p is not None],
+                    "negative_points": [p for p in neg_pts if p is not None],
+                    "positive_rects": [r for r in pos_rects if r is not None],
+                    "negative_rects": [r for r in neg_rects if r is not None],
+                }
+            selection_payload["frames"] = normalized_frames
+
+        frames = selection_payload.get("frames", {}) if isinstance(selection_payload, dict) else {}
+        seed_frame = int(selection_payload.get("seed_frame", 1)) if isinstance(selection_payload, dict) else 1
+        seed_data = frames.get(str(seed_frame), {}) if isinstance(frames, dict) else {}
+        points_pos = list(seed_data.get("positive_points", []) or [])
+        points_neg = list(seed_data.get("negative_points", []) or [])
+        rects_pos = list(seed_data.get("positive_rects", []) or [])
+        rects_neg = list(seed_data.get("negative_rects", []) or [])
+
+        has_any_selection = bool(points_pos or points_neg or rects_pos or rects_neg)
+        if not has_any_selection:
+            QMessageBox.warning(
+                self,
+                "No Selections Found",
+                "No points or rectangles were captured.",
+            )
+            return
+
+        points_pos_text = json.dumps(points_pos) if points_pos else ""
+        points_neg_text = json.dumps(points_neg) if points_neg else ""
+        rects_pos_text = json.dumps(rects_pos) if rects_pos else ""
+        rects_neg_text = json.dumps(rects_neg) if rects_neg else ""
+        log.info(
+            "Generate dialog captured SAM2 seed frame=%s points_pos=%s points_neg=%s rects_pos=%s rects_neg=%s",
+            seed_frame,
+            len(points_pos),
+            len(points_neg),
+            len(rects_pos),
+            len(rects_neg),
+        )
+        self._coordinates_positive_text = points_pos_text
+        self._coordinates_negative_text = points_neg_text
+        self._rectangles_positive_text = rects_pos_text
+        self._rectangles_negative_text = rects_neg_text
+        self._auto_mode = False
+        self._tracking_selection_payload = selection_payload if isinstance(selection_payload, dict) else {}
+        self.points_preview.setPlainText(
+            json.dumps(
+                {
+                    "seed_frame": seed_frame,
+                    "auto_mode": False,
+                    "positive": points_pos_text,
+                    "negative": points_neg_text,
+                    "positive_rects": rects_pos_text,
+                    "negative_rects": rects_neg_text,
+                    "tracking_selection": self._tracking_selection_payload,
+                },
+                indent=2,
+            )
+        )
+        self.tabs.setCurrentWidget(self.page_points)
+
+    def _clear_points_clicked(self):
+        self._coordinates_positive_text = ""
+        self._coordinates_negative_text = ""
+        self._rectangles_positive_text = ""
+        self._rectangles_negative_text = ""
+        self._auto_mode = False
+        self._tracking_selection_payload = {}
+        self.points_preview.clear()
+
+    def _set_tab_visible(self, index, visible):
+        bar = self.tabs.tabBar()
+        if hasattr(bar, "setTabVisible"):
+            bar.setTabVisible(index, bool(visible))
+        else:
+            self.tabs.setTabEnabled(index, bool(visible))
+
+    def _initialize_dialog_state(self):
+        self._on_template_changed(self.template_combo.currentIndex())
diff --git a/src/windows/main_window.py b/src/windows/main_window.py
index 924d8b87c..c14e3290b 100644
--- a/src/windows/main_window.py
+++ b/src/windows/main_window.py
@@ -63,6 +63,8 @@
 from classes.logger import log
 from classes.metrics import track_metric_session, track_metric_screen
 from classes.query import File, Clip, Transition, Marker, Track, Effect
+from classes.generation_queue import GenerationQueueManager
+from classes.generation_service import GenerationService
 from classes.thumbnail import httpThumbnailServerThread, httpThumbnailException
 from classes.time_parts import secondsToTimecode
 from classes.timeline import TimelineSync
@@ -209,6 +211,14 @@ def closeEvent(self, event):
         if self.http_server_thread:
             self.http_server_thread.kill()
 
+        # Stop generation queue worker thread (if any)
+        if getattr(self, "generation_queue", None):
+            self.generation_queue.shutdown()
+
+        # Cleanup temporary generation source files
+        if getattr(self, "generation_service", None):
+            self.generation_service.cleanup_temp_files()
+
         # Stop ZMQ polling thread (if any)
         if app.logger_libopenshot:
             app.logger_libopenshot.kill()
@@ -699,6 +709,7 @@ def clear_temporary_files(self):
                 info.get_default_path("BLENDER_PATH"),
                 info.get_default_path("TITLE_PATH"),
                 info.get_default_path("CLIPBOARD_PATH"),
+                info.get_default_path("COMFYUI_OUTPUT_PATH"),
                 ]:
             try:
                 if os.path.exists(temp_dir):
@@ -1136,8 +1147,13 @@ def actionPreview_File_trigger(self, checked=True):
         """ Preview the selected media file """
         log.info('actionPreview_File_trigger')
 
-        # Loop through selected files (set 1 selected file if more than 1)
+        # Prefer current file, but fall back to selected real files when a generation
+        # placeholder row has focus.
         f = self.files_model.current_file()
+        if not f:
+            selected_files = self.files_model.selected_files()
+            if selected_files:
+                f = selected_files[0]
 
         # Bail out if no file selected
         if not f:
@@ -1984,6 +2000,45 @@ def actionSplitFile_trigger(self):
         else:
             log.info('Cutting Cancelled')
 
+    def comfy_ui_url(self):
+        return self.generation_service.comfy_ui_url()
+
+    def is_comfy_available(self, force=False):
+        return self.generation_service.is_comfy_available(force=force)
+
+    def can_open_generate_dialog(self):
+        return self.generation_service.can_open_generate_dialog()
+
+    def active_generation_job_for_file(self, file_id):
+        if not getattr(self, "generation_queue", None):
+            return None
+        return self.generation_queue.get_active_job_for_file(file_id)
+
+    def cancel_generation_job(self, job_id):
+        if not job_id:
+            log.debug("MainWindow cancel_generation_job ignored; empty job_id")
+            return
+        log.debug("MainWindow cancel_generation_job requested job=%s", str(job_id))
+        if self.generation_queue.cancel_job(job_id):
+            log.debug("MainWindow cancel_generation_job accepted job=%s", str(job_id))
+            self.statusBar.showMessage("Generation canceled", 3000)
+        else:
+            log.debug("MainWindow cancel_generation_job rejected job=%s", str(job_id))
+
+    def actionCancelGenerationJob_trigger(self, checked=True):
+        file_id = self.current_file_id()
+        if not file_id:
+            return
+        active_job = self.active_generation_job_for_file(file_id)
+        if active_job:
+            self.cancel_generation_job(active_job.get("id"))
+
+    def actionGenerate_trigger(self, checked=True):
+        self.generation_service.action_generate_trigger(checked=checked)
+
+    def _on_generation_job_finished(self, job_id, status):
+        self.generation_service.on_generation_job_finished(job_id, status)
+
     def actionRemove_from_Project_trigger(self):
         log.debug("actionRemove_from_Project_trigger")
 
@@ -1995,6 +2050,10 @@ def actionRemove_from_Project_trigger(self):
             if not f:
                 continue
 
+            # Cancel queued/running generation jobs tied to this file
+            if getattr(self, "generation_queue", None):
+                self.generation_queue.cancel_jobs_for_file(f.data.get("id"))
+
             # Find matching clips (if any)
             clips = Clip.filter(file_id=f.data.get("id"))
             for c in clips:
@@ -3399,7 +3458,7 @@ def initModels(self):
         s = get_app().get_settings()
 
         # Setup files tree and list view (both share a model)
-        self.files_model = FilesModel()
+        self.files_model = FilesModel(generation_queue=self.generation_queue)
         self.filesTreeView = FilesTreeView(self.files_model)
         self.filesListView = FilesListView(self.files_model)
         self.files_model.update_model()
@@ -3461,6 +3520,19 @@ def initModels(self):
         self.emojiListView = EmojisListView(self.emojis_model)
         self.tabEmojis.layout().addWidget(self.emojiListView)
 
+    def _init_generation_actions(self):
+        self.actionGenerate = QAction("Generate...", self)
+        self.actionGenerate.setObjectName("actionGenerate")
+        sparkle_icon_path = os.path.join(info.PATH, "themes", "cosmic", "images", "tool-generate-sparkle.svg")
+        self.actionGenerate.setIcon(QIcon(sparkle_icon_path))
+        self.actionGenerate.setShortcut(QKeySequence("Ctrl+G"))
+        self.actionGenerate.setShortcutContext(Qt.ApplicationShortcut)
+        self.actionGenerate.triggered.connect(self.actionGenerate_trigger)
+
+        self.actionCancelGenerationJob = QAction("Cancel Job", self)
+        self.actionCancelGenerationJob.setObjectName("actionCancelGenerationJob")
+        self.actionCancelGenerationJob.triggered.connect(self.actionCancelGenerationJob_trigger)
+
     def actionInsertKeyframe(self):
         log.debug("actionInsertKeyframe")
         if self.selected_clips or self.selected_transitions:
@@ -4048,6 +4120,10 @@ def __init__(self, *args):
 
         # Create dock toolbars, set initial state of items, etc
         self.setup_toolbars()
+        self.generation_service = GenerationService(self)
+        self.generation_queue = GenerationQueueManager(self)
+        self.generation_queue.job_finished.connect(self._on_generation_job_finished)
+        self._init_generation_actions()
 
         # Add window as watcher to receive undo/redo status updates
         app.updates.add_watcher(self)
diff --git a/src/windows/models/files_model.py b/src/windows/models/files_model.py
index 6694dadbf..df2c181fa 100644
--- a/src/windows/models/files_model.py
+++ b/src/windows/models/files_model.py
@@ -144,6 +144,7 @@ def __init__(self, **kwargs):
 
 class FilesModel(QObject, updates.UpdateInterface):
     ModelRefreshed = pyqtSignal()
+    PLACEHOLDER_PREFIX = "__genjob__:"
 
     # This method is invoked by the UpdateManager each time a change happens (i.e UpdateInterface)
     def changed(self, action):
@@ -303,6 +304,7 @@ def update_model(self, clear=True, delete_file_id=None, update_file_id=None):
 
         # Emit signal when model is updated
         self.ModelRefreshed.emit()
+        self._rebuild_generation_placeholders()
 
     def add_files(self, files, image_seq_details=None, quiet=False,
                   prevent_image_seq=False, prevent_recent_folder=False):
@@ -435,6 +437,7 @@ def add_files(self, files, image_seq_details=None, quiet=False,
 
         # Select all new files (clear previous selection)
         self.selection_model.clearSelection()
+        last_selected_index = QModelIndex()
         for file_object in scroll_to_files:
             # Get the index of the newly added file in the proxy model
             index = self.proxy_model.get_file_index(file_object.id)
@@ -442,6 +445,11 @@ def add_files(self, files, image_seq_details=None, quiet=False,
                 # Select & scroll to selection
                 self.selection_model.select(index, QItemSelectionModel.Select | QItemSelectionModel.Rows)
                 get_app().window.filesView.scrollTo(index.siblingAtColumn(0), QAbstractItemView.PositionAtCenter)
+                last_selected_index = index
+        if last_selected_index.isValid():
+            # Keep current index aligned with the newly selected file so actions
+            # (preview/properties/etc.) resolve to the expected item.
+            self.selection_model.setCurrentIndex(last_selected_index, QItemSelectionModel.NoUpdate)
 
         message = _("Imported %(count)d files") % {"count": len(files) - 1}
         app.window.statusBar.showMessage(message, 3000)
@@ -615,8 +623,13 @@ def selected_file_ids(self):
         """ Get a list of file IDs for all selected files """
         # Get the indexes for column 5 of all selected rows
         selected = self.selection_model.selectedRows(5)
-
-        return [idx.data() for idx in selected]
+        ids = []
+        for idx in selected:
+            file_id = idx.data()
+            if not file_id or self._is_generation_placeholder(file_id):
+                continue
+            ids.append(file_id)
+        return ids
 
     def selected_files(self):
         """ Get a list of File objects representing the current selection """
@@ -631,20 +644,26 @@ def current_file_id(self):
         # switching between details/list views with separate selection models.
         selected_rows = self.selection_model.selectedRows(5)
         if selected_rows:
+            selected_ids = set()
+            for row_index in selected_rows:
+                file_id = row_index.data()
+                if file_id and not self._is_generation_placeholder(file_id):
+                    selected_ids.add(file_id)
+
             current = self.selection_model.currentIndex()
             if current and current.isValid():
                 current_id = current.sibling(current.row(), 5).data()
-                if current_id:
+                if current_id and current_id in selected_ids:
                     return current_id
             for row_index in selected_rows:
                 file_id = row_index.data()
-                if file_id:
+                if file_id and not self._is_generation_placeholder(file_id):
                     return file_id
 
         cur = self.selection_model.currentIndex()
         if cur and cur.isValid():
             file_id = cur.sibling(cur.row(), 5).data()
-            if file_id:
+            if file_id and not self._is_generation_placeholder(file_id):
                 return file_id
 
     def current_file(self):
@@ -714,7 +733,8 @@ def _sync_list_to_tree_selection(self, selected, deselected):
         finally:
             self._syncing_selection = False
 
-    def __init__(self, *args):
+    def __init__(self, generation_queue=None, *args):
+        self.generation_queue = generation_queue
 
         # Add self as listener to project data updates
         # (undo/redo, as well as normal actions handled within this class all update the model)
@@ -756,6 +776,13 @@ def __init__(self, *args):
         app.window.FileUpdated.connect(self.update_file_thumbnail)
         app.window.refreshFilesSignal.connect(
             functools.partial(self.update_model, clear=False))
+        if self.generation_queue:
+            self.generation_queue.file_job_changed.connect(self._refresh_file_generation_display)
+            self.generation_queue.queue_changed.connect(self._refresh_all_generation_displays)
+            self.generation_queue.job_added.connect(self._on_generation_job_added)
+            self.generation_queue.job_updated.connect(self._on_generation_job_updated)
+            self.generation_queue.job_finished.connect(self._on_generation_job_finished)
+            self.generation_queue.job_removed.connect(self._on_generation_job_removed)
 
         # Call init for superclass QObject
         super(QObject, FilesModel).__init__(self, *args)
@@ -775,3 +802,209 @@ def __init__(self, *args):
                 log.info("Enabled {} model tests for emoji data".format(len(self.model_tests)))
             except ImportError:
                 pass
+
+    def _is_generation_placeholder(self, file_id):
+        return str(file_id or "").startswith(self.PLACEHOLDER_PREFIX)
+
+    def _placeholder_id_for_job(self, job_id):
+        return "{}{}".format(self.PLACEHOLDER_PREFIX, str(job_id or ""))
+
+    def _job_id_from_placeholder(self, file_id):
+        file_id = str(file_id or "")
+        if not self._is_generation_placeholder(file_id):
+            return None
+        return file_id[len(self.PLACEHOLDER_PREFIX):]
+
+    def _placeholder_row_for_job(self, job_id):
+        placeholder_id = self._placeholder_id_for_job(job_id)
+        if placeholder_id not in self.model_ids:
+            return None
+        id_index = self.model_ids[placeholder_id]
+        if not id_index.isValid():
+            return None
+        return id_index.row()
+
+    def _generation_icon_for_job(self, job):
+        icon_name = "tool-generate-sparkle.svg"
+        try:
+            app = get_app()
+            window = getattr(app, "window", None)
+            generation_service = getattr(window, "generation_service", None)
+            if generation_service and isinstance(job, dict):
+                template_id = str(job.get("template_id") or "").strip()
+                template = generation_service.template_registry.get_template(template_id)
+                if template:
+                    resolved_icon = generation_service.icon_for_template(template)
+                    if resolved_icon:
+                        icon_name = resolved_icon
+        except Exception:
+            pass
+
+        icon_path = os.path.join(info.PATH, "themes", "cosmic", "images", icon_name)
+        if os.path.exists(icon_path):
+            return QIcon(icon_path)
+
+        emoji_icon_path = os.path.join(info.PATH, "emojis", "color", "svg", "2728.svg")
+        if os.path.exists(emoji_icon_path):
+            return QIcon(emoji_icon_path)
+        return QIcon(":/icons/Humanity/actions/16/media-record.svg")
+
+    def _add_generation_placeholder(self, job_id):
+        job = self.generation_queue.get_job(job_id) if self.generation_queue else None
+        if not job:
+            return
+        if job.get("source_file_id"):
+            return
+
+        placeholder_id = self._placeholder_id_for_job(job_id)
+        if placeholder_id in self.model_ids and self.model_ids[placeholder_id].isValid():
+            self._update_generation_placeholder(job_id)
+            return
+
+        name = str(job.get("name") or "generation")
+        status = str(job.get("status") or "queued")
+        progress = int(job.get("progress", 0))
+        progress_detail = str(job.get("progress_detail") or "").strip()
+        label = name
+        if status == "running":
+            label = "{} ({}%)".format(name, progress)
+            if progress_detail:
+                label = "{} [{}]".format(label, progress_detail)
+        elif status == "queued":
+            label = "{} (Queued)".format(name)
+        elif status == "canceling":
+            label = "{} (Canceling...)".format(name)
+
+        row = []
+        icon = self._generation_icon_for_job(job)
+        flags = Qt.ItemIsSelectable | Qt.ItemIsEnabled | Qt.ItemNeverHasChildren
+
+        col = QStandardItem(icon, label)
+        col.setFlags(flags)
+        row.append(col)
+
+        col = QStandardItem(label)
+        col.setFlags(flags)
+        row.append(col)
+
+        col = QStandardItem("generation")
+        col.setFlags(flags)
+        row.append(col)
+
+        col = QStandardItem("generation_job")
+        col.setFlags(flags)
+        row.append(col)
+
+        col = QStandardItem("")
+        col.setFlags(flags)
+        row.append(col)
+
+        col = QStandardItem(placeholder_id)
+        col.setFlags(flags)
+        row.append(col)
+
+        self.model.appendRow(row)
+        self.model_ids[placeholder_id] = QPersistentModelIndex(row[5].index())
+        self.ModelRefreshed.emit()
+
+    def _update_generation_placeholder(self, job_id):
+        row = self._placeholder_row_for_job(job_id)
+        if row is None:
+            self._add_generation_placeholder(job_id)
+            return
+        job = self.generation_queue.get_job(job_id) if self.generation_queue else None
+        if not job:
+            return
+
+        name = str(job.get("name") or "generation")
+        status = str(job.get("status") or "queued")
+        progress = int(job.get("progress", 0))
+        progress_detail = str(job.get("progress_detail") or "").strip()
+        label = name
+        if status == "running":
+            label = "{} ({}%)".format(name, progress)
+            if progress_detail:
+                label = "{} [{}]".format(label, progress_detail)
+        elif status == "queued":
+            label = "{} (Queued)".format(name)
+        elif status == "canceling":
+            label = "{} (Canceling...)".format(name)
+
+        self.model.item(row, 0).setIcon(self._generation_icon_for_job(job))
+        self.model.item(row, 0).setText(label)
+        self.model.item(row, 1).setText(label)
+        left = self.model.index(row, 0)
+        right = self.model.index(row, 1)
+        self.model.dataChanged.emit(left, right, [Qt.DisplayRole, Qt.AccessibleTextRole])
+        self.ModelRefreshed.emit()
+
+    def _remove_generation_placeholder(self, job_id):
+        placeholder_id = self._placeholder_id_for_job(job_id)
+        if placeholder_id not in self.model_ids:
+            return
+        id_index = self.model_ids.get(placeholder_id)
+        if not id_index or not id_index.isValid():
+            self.model_ids.pop(placeholder_id, None)
+            return
+        row = id_index.row()
+        self.model.removeRows(row, 1, id_index.parent())
+        self.model.submit()
+        self.model_ids.pop(placeholder_id, None)
+        self.ModelRefreshed.emit()
+
+    def _rebuild_generation_placeholders(self):
+        if not self.generation_queue:
+            return
+        for job in list(self.generation_queue.jobs.values()):
+            if job.get("source_file_id"):
+                continue
+            if job.get("status") in ("completed", "failed", "canceled"):
+                self._remove_generation_placeholder(job.get("id"))
+            else:
+                self._add_generation_placeholder(job.get("id"))
+
+    def _on_generation_job_added(self, job_id, source_file_id):
+        if source_file_id:
+            return
+        self._add_generation_placeholder(job_id)
+
+    def _on_generation_job_updated(self, job_id, status, progress):
+        job = self.generation_queue.get_job(job_id) if self.generation_queue else None
+        if not job or job.get("source_file_id"):
+            return
+        if status in ("completed", "failed", "canceled"):
+            self._remove_generation_placeholder(job_id)
+        else:
+            self._update_generation_placeholder(job_id)
+
+    def _on_generation_job_finished(self, job_id, status):
+        job = self.generation_queue.get_job(job_id) if self.generation_queue else None
+        if not job or job.get("source_file_id"):
+            return
+        self._remove_generation_placeholder(job_id)
+
+    def _on_generation_job_removed(self, job_id):
+        self._remove_generation_placeholder(job_id)
+
+    def _refresh_file_generation_display(self, file_id):
+        file_id = str(file_id or "")
+        if not file_id:
+            return
+        if file_id not in self.model_ids:
+            return
+        id_index = self.model_ids[file_id]
+        if not id_index.isValid():
+            return
+        row = id_index.row()
+        left = self.model.index(row, 0)
+        right = self.model.index(row, 0)
+        self.model.dataChanged.emit(left, right, [Qt.DisplayRole, Qt.AccessibleTextRole])
+        self.ModelRefreshed.emit()
+
+    def _refresh_all_generation_displays(self):
+        if self.model.rowCount() < 1:
+            return
+        left = self.model.index(0, 0)
+        right = self.model.index(self.model.rowCount() - 1, 0)
+        self.model.dataChanged.emit(left, right, [Qt.DisplayRole, Qt.AccessibleTextRole])
+        self.ModelRefreshed.emit()
diff --git a/src/windows/preferences.py b/src/windows/preferences.py
index 5de53ed1c..c6f278483 100644
--- a/src/windows/preferences.py
+++ b/src/windows/preferences.py
@@ -43,6 +43,7 @@
 from classes import info, ui_util, tabstops
 from classes import openshot_rc  # noqa
 from classes.app import get_app
+from classes.comfy_client import ComfyClient
 from classes.language import get_all_languages
 from classes.logger import log
 from classes.metrics import track_metric_screen
@@ -284,6 +285,10 @@ def Populate(self, filter=""):
                         # Add filesystem browser button
                         extraWidget = QPushButton(_("Browse..."))
                         extraWidget.clicked.connect(functools.partial(self.selectExecutable, widget, param))
+                    elif param.get("setting") == "comfy-ui-url":
+                        # Add an explicit connectivity check for ComfyUI URL.
+                        extraWidget = QPushButton(_("Check"))
+                        extraWidget.clicked.connect(functools.partial(self.check_comfy_ui_url, widget, param))
 
                 elif param["type"] == "bool":
                     # create spinner
@@ -673,6 +678,55 @@ def text_value_changed(self, widget, param, value=None):
         # Check for restart
         self.check_for_restart(param)
 
+    def check_comfy_ui_url(self, widget, param):
+        _ = get_app()._tr
+        url = str(widget.text() or "").strip().rstrip("/")
+        if not url:
+            log.info("ComfyUI URL check failed: empty URL")
+            QMessageBox.warning(self, _("Comfy UI URL"), _("Comfy UI URL is empty."))
+            return
+
+        # Persist normalized URL before validation.
+        self.s.set(param["setting"], url)
+        widget.setText(url)
+
+        available = False
+        error_text = ""
+        try:
+            available = ComfyClient(url).ping(timeout=2.0)
+        except Exception as ex:
+            error_text = str(ex)
+
+        # Refresh cached availability so context menus update immediately.
+        try:
+            if getattr(get_app(), "window", None):
+                get_app().window.is_comfy_available(force=True)
+        except Exception:
+            log.debug("ComfyUI availability cache refresh failed", exc_info=1)
+
+        if available:
+            log.info("ComfyUI URL check succeeded at %s", url)
+            QMessageBox.information(
+                self,
+                _("Comfy UI URL"),
+                _("Connection successful. AI menus are enabled."),
+            )
+        else:
+            if error_text:
+                log.info("ComfyUI URL check failed at %s (%s)", url, error_text)
+                message = _("Connection failed: {}").format(error_text)
+            else:
+                log.info("ComfyUI URL check failed at %s", url)
+                message = _("Connection failed.")
+            QMessageBox.warning(
+                self,
+                _("Comfy UI URL"),
+                "{}\n{}".format(
+                    message,
+                    _("AI menus are disabled until ComfyUI is reachable."),
+                ),
+            )
+
     def dropdown_index_changed(self, widget, param, index):
         # Save setting
         value = widget.itemData(index)
diff --git a/src/windows/preview_thread.py b/src/windows/preview_thread.py
index ef668eccd..024286011 100644
--- a/src/windows/preview_thread.py
+++ b/src/windows/preview_thread.py
@@ -62,16 +62,25 @@ def onPositionChanged(self, current_frame):
 
         # Check if we are at the end of the timeline
         if self.worker.player.Mode() == openshot.PLAYBACK_PLAY:
+            loop_preview = bool(getattr(self.parent, "loop_playback", False))
             if self.worker.player.Speed() > 0.0 and current_frame >= self.timeline_max_length:
-                # Yes, pause the video
-                self.parent.PauseSignal.emit()
-                # If the player got past the end of the project, go back.
-                self.worker.Seek(self.timeline_max_length)
+                if loop_preview:
+                    # Loop preview playback back to the beginning.
+                    self.worker.Seek(1)
+                else:
+                    # Yes, pause the video
+                    self.parent.PauseSignal.emit()
+                    # If the player got past the end of the project, go back.
+                    self.worker.Seek(self.timeline_max_length)
             if self.worker.player.Speed() < 0.0 and current_frame <= 1:
-                # If rewinding, and the player got past the first frame,
-                # pause and go to frame 1
-                self.parent.PauseSignal.emit()
-                self.worker.Seek(1)
+                if loop_preview:
+                    # Loop rewind to the end frame.
+                    self.worker.Seek(self.timeline_max_length)
+                else:
+                    # If rewinding, and the player got past the first frame,
+                    # pause and go to frame 1
+                    self.parent.PauseSignal.emit()
+                    self.worker.Seek(1)
 
     # Signal when the playback mode changes in the preview player (i.e PLAY, PAUSE, STOP)
     def onModeChanged(self, current_mode):
diff --git a/src/windows/process_effect.py b/src/windows/process_effect.py
index 162c67f9a..b79a1603b 100644
--- a/src/windows/process_effect.py
+++ b/src/windows/process_effect.py
@@ -33,7 +33,7 @@
 
 from PyQt5.QtCore import Qt, pyqtSignal, QCoreApplication
 from PyQt5.QtGui import QPainter
-from PyQt5.QtWidgets import QPushButton, QDialog, QLabel, QDoubleSpinBox, QSpinBox, QLineEdit, QCheckBox, QComboBox, QDialogButtonBox, QSizePolicy
+from PyQt5.QtWidgets import QPushButton, QDialog, QLabel, QDoubleSpinBox, QSpinBox, QLineEdit, QCheckBox, QComboBox, QDialogButtonBox, QSizePolicy, QMessageBox
 import openshot  # Python module for libopenshot (required video editing module installed separately)
 
 from classes import info
@@ -279,6 +279,7 @@ def text_value_changed(self, widget, param, value=None):
 
     def rect_select_clicked(self, widget, param):
         """Rect select button clicked"""
+        _ = get_app()._tr
         self.context[param["setting"]].update({"button-clicked": True})
 
         # show dialog
@@ -289,25 +290,41 @@ def rect_select_clicked(self, widget, param):
         reader_path = c.data.get('reader', {}).get('path','')
         f = File.get(path=reader_path)
         if f:
-            win = SelectRegion(f, self.clip_instance)
+            win = SelectRegion(f, self.clip_instance, parent=self)
             # Run the dialog event loop - blocking interaction on this window during that time
             result = win.exec_()
             if result == QDialog.Accepted:
                 # self.first_frame = win.current_frame
                 # Region selected (get coordinates if any)
-                topLeft = win.videoPreview.regionTopLeftHandle
-                bottomRight = win.videoPreview.regionBottomRightHandle
-                viewPortSize = win.viewport_rect
-                curr_frame_size = win.videoPreview.curr_frame_size
-
-                x1 = topLeft.x() / curr_frame_size.width()
-                y1 = topLeft.y() / curr_frame_size.height()
-                x2 = bottomRight.x() / curr_frame_size.width()
-                y2 = bottomRight.y() / curr_frame_size.height()
+                selected_rect = win.selected_rect_normalized() if hasattr(win, "selected_rect_normalized") else None
+                if selected_rect:
+                    x1 = float(selected_rect.get("normalized_x", 0.0))
+                    y1 = float(selected_rect.get("normalized_y", 0.0))
+                    xw = float(selected_rect.get("normalized_width", 0.0))
+                    yh = float(selected_rect.get("normalized_height", 0.0))
+                else:
+                    topLeft = win.videoPreview.regionTopLeftHandle
+                    bottomRight = win.videoPreview.regionBottomRightHandle
+                    curr_frame_size = win.videoPreview.curr_frame_size
+                    if not topLeft or not bottomRight or not curr_frame_size:
+                        QMessageBox.warning(
+                            self,
+                            _("Invalid Region"),
+                            _("Please draw a rectangle region before clicking Select Region."),
+                        )
+                        return
+                    x1 = topLeft.x() / curr_frame_size.width()
+                    y1 = topLeft.y() / curr_frame_size.height()
+                    x2 = bottomRight.x() / curr_frame_size.width()
+                    y2 = bottomRight.y() / curr_frame_size.height()
+                    xw = x2 - x1
+                    yh = y2 - y1
 
                 # Get QImage of region
-                if win.videoPreview.region_qimage:
+                region_qimage = win.selected_region_qimage() if hasattr(win, "selected_region_qimage") else None
+                if region_qimage is None and win.videoPreview.region_qimage:
                     region_qimage = win.videoPreview.region_qimage
+                if region_qimage:
 
                     # Resize QImage to match button size
                     resized_qimage = region_qimage.scaled(widget.size(), Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
@@ -317,13 +334,12 @@ def rect_select_clicked(self, widget, param):
                     widget.setText("")
 
                 # If data found, add to context
-                if topLeft and bottomRight:
-                    self.context[param["setting"]].update({"normalized_x": x1, "normalized_y": y1,
-                                                           "normalized_width": x2-x1,
-                                                           "normalized_height": y2-y1,
-                                                           "first-frame": win.current_frame,
-                                                           })
-                    log.info(self.context)
+                self.context[param["setting"]].update({"normalized_x": x1, "normalized_y": y1,
+                                                       "normalized_width": xw,
+                                                       "normalized_height": yh,
+                                                       "first-frame": win.current_frame,
+                                                       })
+                log.info(self.context)
 
         else:
             log.error('No file found with path: %s' % reader_path)
diff --git a/src/windows/region.py b/src/windows/region.py
index f9bb068dd..cbb72006b 100644
--- a/src/windows/region.py
+++ b/src/windows/region.py
@@ -31,6 +31,7 @@
 import math
 
 from PyQt5.QtCore import *
+from PyQt5.QtGui import QIcon, QPainter, QColor, QPen, QBrush
 from PyQt5.QtWidgets import *
 import openshot  # Python module for libopenshot (required video editing module installed separately)
 
@@ -43,6 +44,126 @@
 
 import json
 
+
+class RegionAnnotatedSlider(QSlider):
+    frameClicked = pyqtSignal(int)
+
+    def __init__(self, orientation, parent=None):
+        super().__init__(orientation, parent)
+        self.setMouseTracking(True)
+        self._total_frames = 1
+        self._current_frame = 1
+        self._markers = []  # list of (frame, kind)
+        self._marker_positions = []  # list of (x, y, frame)
+
+    def set_frames(self, total_frames, current_frame, markers):
+        self._total_frames = int(max(1, total_frames or 1))
+        self._current_frame = int(max(1, current_frame or 1))
+        self._markers = sorted(list(markers or []), key=lambda item: int(item[0]))
+        self.update()
+
+    def _groove_rect(self):
+        opt = QStyleOptionSlider()
+        self.initStyleOption(opt)
+        return self.style().subControlRect(QStyle.CC_Slider, opt, QStyle.SC_SliderGroove, self)
+
+    def _handle_rect(self):
+        opt = QStyleOptionSlider()
+        self.initStyleOption(opt)
+        return self.style().subControlRect(QStyle.CC_Slider, opt, QStyle.SC_SliderHandle, self)
+
+    def _x_for_frame(self, frame):
+        groove = self._groove_rect()
+        left = float(groove.left())
+        right = float(groove.right())
+        span = max(1.0, right - left)
+        if self._total_frames <= 1:
+            return int(round(left))
+        ratio = float(max(1, min(self._total_frames, int(frame))) - 1) / float(max(1, self._total_frames - 1))
+        return int(round(left + ratio * span))
+
+    def mousePressEvent(self, event):
+        if event.button() != Qt.LeftButton:
+            return super().mousePressEvent(event)
+        handle = self._handle_rect().adjusted(-4, -4, 4, 4)
+        if handle.contains(event.pos()):
+            return super().mousePressEvent(event)
+        if not self._marker_positions:
+            return super().mousePressEvent(event)
+        click_x = int(event.pos().x())
+        click_y = int(event.pos().y())
+        nearest = min(
+            self._marker_positions,
+            key=lambda item: (int(item[0]) - click_x) * (int(item[0]) - click_x) + (int(item[1]) - click_y) * (int(item[1]) - click_y),
+        )
+        dx = abs(int(nearest[0]) - click_x)
+        dy = abs(int(nearest[1]) - click_y)
+        if dx <= 8 and dy <= 8:
+            self.setValue(int(nearest[2]))
+            self.frameClicked.emit(int(nearest[2]))
+            event.accept()
+            return
+        return super().mousePressEvent(event)
+
+    def mouseMoveEvent(self, event):
+        handle = self._handle_rect().adjusted(-4, -4, 4, 4)
+        if handle.contains(event.pos()):
+            self.unsetCursor()
+            return super().mouseMoveEvent(event)
+        if self._marker_positions:
+            x = int(event.pos().x())
+            y = int(event.pos().y())
+            nearest = min(
+                self._marker_positions,
+                key=lambda item: (int(item[0]) - x) * (int(item[0]) - x) + (int(item[1]) - y) * (int(item[1]) - y),
+            )
+            if abs(int(nearest[0]) - x) <= 8 and abs(int(nearest[1]) - y) <= 8:
+                self.setCursor(Qt.PointingHandCursor)
+            else:
+                self.unsetCursor()
+        else:
+            self.unsetCursor()
+        return super().mouseMoveEvent(event)
+
+    def leaveEvent(self, event):
+        self.unsetCursor()
+        return super().leaveEvent(event)
+
+    def paintEvent(self, event):
+        super().paintEvent(event)
+        painter = QPainter(self)
+        painter.setRenderHint(QPainter.Antialiasing, True)
+
+        groove = self._groove_rect()
+        # Center markers on the slider groove line.
+        mid_y = int(groove.center().y())
+
+        self._marker_positions = []
+        for frame, kind in self._markers:
+            x = self._x_for_frame(frame)
+            self._marker_positions.append((x, mid_y, int(frame)))
+            if kind == "both":
+                painter.setPen(Qt.NoPen)
+                painter.setBrush(QBrush(QColor("#53A0ED")))
+                painter.drawEllipse(x - 4, mid_y - 4, 8, 8)
+                painter.setBrush(QBrush(QColor("#E05757")))
+                painter.drawEllipse(x, mid_y, 8, 8)
+            elif kind == "negative":
+                painter.setPen(Qt.NoPen)
+                painter.setBrush(QBrush(QColor("#E05757")))
+                painter.drawEllipse(x - 4, mid_y - 4, 8, 8)
+            else:
+                painter.setPen(Qt.NoPen)
+                painter.setBrush(QBrush(QColor("#53A0ED")))
+                painter.drawEllipse(x - 4, mid_y - 4, 8, 8)
+
+        # Current-frame indicator
+        cx = self._x_for_frame(self._current_frame)
+        cur_pen = QPen(QColor("#EAF5FF"), 1)
+        painter.setPen(cur_pen)
+        painter.drawLine(cx, max(0, groove.top() - 5), cx, min(self.height() - 1, groove.bottom() + 5))
+        painter.end()
+
 class SelectRegion(QDialog):
     """ SelectRegion Dialog """
 
@@ -60,21 +181,58 @@ class SelectRegion(QDialog):
     SpeedSignal = pyqtSignal(float)
     StopSignal = pyqtSignal()
 
-    def __init__(self, file=None, clip=None):
+    def __init__(self, file=None, clip=None, selection_mode="rect", parent=None):
         _ = get_app()._tr
 
         # Create dialog class
-        QDialog.__init__(self)
+        QDialog.__init__(self, parent)
 
         # Load UI from designer
         ui_util.load_ui(self, self.ui_path)
 
         # Init UI
         ui_util.init_ui(self)
+        if parent is None:
+            self.setWindowFlags(
+                (self.windowFlags() & ~Qt.Dialog)
+                | Qt.Window
+                | Qt.WindowMinMaxButtonsHint
+                | Qt.WindowMaximizeButtonHint
+            )
+        else:
+            self.setWindowModality(Qt.WindowModal)
+        self.setSizeGripEnabled(True)
 
         # Track metrics
         track_metric_screen("cutting-screen")
 
+        self.selection_mode = str(selection_mode or "rect").strip().lower()
+        if self.selection_mode not in ("rect", "point", "annotate"):
+            self.selection_mode = "rect"
+        if self.selection_mode == "annotate":
+            # Replace stock UI slider with custom-painted annotation slider
+            # so marker dots are perfectly aligned to the slider groove.
+            original_slider = self.sliderVideo
+            custom_slider = RegionAnnotatedSlider(Qt.Horizontal, original_slider.parent())
+            custom_slider.setObjectName(original_slider.objectName())
+            custom_slider.setTracking(original_slider.hasTracking())
+            if hasattr(self, "horizontalLayout_3"):
+                self.horizontalLayout_3.replaceWidget(original_slider, custom_slider)
+            original_slider.hide()
+            original_slider.deleteLater()
+            self.sliderVideo = custom_slider
+            self.sliderVideo.frameClicked.connect(self._on_marker_frame_clicked)
+        self._selected_points = []
+        self._selected_points_negative = []
+        self._selected_payload = {}
+        self._selected_rect_normalized = None
+        self._selected_region_qimage = None
+        self.loop_playback = False
+        self.frame_annotations = {}
+        self._last_annotation_frame = 1
+        self._frame_has_local_keyframe = False
+        self._frame_edited = False
+
         self.start_frame = 1
         self.start_image = None
         self.end_frame = 1
@@ -82,19 +240,31 @@ def __init__(self, file=None, clip=None):
         self.current_frame = 1
 
         # Create region clip with Reader
-        self.clip = openshot.Clip(clip.Reader())
-        self.clip.Open()
-
-        # Set region clip start and end
-        self.clip.Start(clip.Start())
-        self.clip.End(clip.End())
-        self.clip.Id( get_app().project.generate_id() )
+        if clip:
+            self.clip = openshot.Clip(clip.Reader())
+            self.clip.Open()
+            # Set region clip start and end
+            self.clip.Start(clip.Start())
+            self.clip.End(clip.End())
+        else:
+            source_path = ""
+            if file:
+                if hasattr(file, "absolute_path"):
+                    source_path = file.absolute_path()
+                else:
+                    source_path = str(getattr(file, "data", {}).get("path", ""))
+            self.clip = openshot.Clip(source_path)
+            self.clip.Open()
+        self.clip.Id(get_app().project.generate_id())
 
         # Keep track of file object
         self.file = file
-        self.file_path = file.absolute_path()
+        if file and hasattr(file, "absolute_path"):
+            self.file_path = file.absolute_path()
+        else:
+            self.file_path = str(getattr(file, "data", {}).get("path", ""))
 
-        c_info = clip.Reader().info
+        c_info = self.clip.Reader().info
         self.fps = c_info.fps.ToInt()
         self.fps_num = c_info.fps.num
         self.fps_den = c_info.fps.den
@@ -106,16 +276,33 @@ def __init__(self, file=None, clip=None):
         self.video_length = int(self.clip.Duration() * self.fps) + 1
 
         # Apply effects to region frames
-        for effect in clip.Effects():
-            self.clip.AddEffect(effect)
+        if clip:
+            for effect in clip.Effects():
+                self.clip.AddEffect(effect)
 
         # Open video file with Reader
         log.info(self.clip.Reader())
 
+        # Set instruction text first so it remains above the preview widget.
+        if self.selection_mode == "point":
+            self.lblInstructions.setText(
+                _("Click to add tracking point (SHIFT+Click for additional points, CTRL+Click for negative point)")
+            )
+        elif self.selection_mode == "annotate":
+            self.lblInstructions.setText(
+                _("Choose a tool and mark positive/negative points or rectangles. Scrub to edit selections by frame.")
+            )
+        else:
+            self.lblInstructions.setText(_("Draw a rectangle to select a region of the video frame."))
+
         # Add Video Widget
         self.videoPreview = VideoWidget()
         self.videoPreview.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding)
-        self.verticalLayout.insertWidget(0, self.videoPreview)
+        self.videoPreview.region_selection_mode = self.selection_mode
+        self.videoPreview.regionAnnotationChanged.connect(self._on_video_annotation_changed)
+        self.verticalLayout.insertWidget(1, self.videoPreview)
+        if self.selection_mode == "annotate":
+            self._build_annotation_toolbar()
 
         # Set aspect ratio to match source content
         aspect_ratio = openshot.Fraction(self.width, self.height)
@@ -172,7 +359,13 @@ def __init__(self, file=None, clip=None):
 
         # Add buttons
         self.cancel_button = QPushButton(_('Cancel'))
-        self.process_button = QPushButton(_('Select Region'))
+        if self.selection_mode == "rect":
+            process_label = _('Select Region')
+        elif self.selection_mode == "annotate":
+            process_label = _('Apply Selections')
+        else:
+            process_label = _('Select Point(s)')
+        self.process_button = QPushButton(process_label)
         self.buttonBox.addButton(self.process_button, QDialogButtonBox.AcceptRole)
         self.buttonBox.addButton(self.cancel_button, QDialogButtonBox.RejectRole)
 
@@ -181,16 +374,264 @@ def __init__(self, file=None, clip=None):
         self.btnPlay.clicked.connect(self.btnPlay_clicked)
         self.sliderVideo.valueChanged.connect(self.sliderVideo_valueChanged)
         self.initialized = True
+        if self.selection_mode == "annotate":
+            self._load_frame_annotations(1)
+            self._update_defined_frames_label()
+            self._refresh_marker_bar()
 
-        get_app().window.SelectRegionSignal.emit(clip.Id())
+        get_app().window.SelectRegionSignal.emit(self.clip.Id())
 
     def actionPlay_Triggered(self):
         # Trigger play button (This action is invoked from the preview thread, so it must exist here)
         self.btnPlay.click()
 
+    def _icon_path(self, name):
+        icon_name = str(name or "").strip()
+        path = os.path.join(info.PATH, "themes", "humanity", "images", icon_name)
+        if os.path.exists(path):
+            return path
+        return ""
+
+    def _build_annotation_toolbar(self):
+        _ = get_app()._tr
+        self.annotation_toolbar = QHBoxLayout()
+        self.annotation_toolbar.setContentsMargins(0, 0, 0, 0)
+        self.annotation_toolbar.setSpacing(6)
+
+        self.annotation_tool_group = QButtonGroup(self)
+        self.annotation_tool_group.setExclusive(True)
+        checked_style = (
+            "QToolButton:checked {"
+            "  background-color: palette(highlight);"
+            "  color: palette(highlighted-text);"
+            "  border: 1px solid palette(highlight);"
+            "}"
+        )
+
+        tool_defs = [
+            ("positive_point", _("Positive Point"), "ai-track-point-positive.svg"),
+            ("negative_point", _("Negative Point"), "ai-track-point-negative.svg"),
+            ("positive_rect", _("Positive Rectangle"), "ai-track-rect-positive.svg"),
+            ("negative_rect", _("Negative Rectangle"), "ai-track-rect-negative.svg"),
+        ]
+        self.annotation_tool_buttons = {}
+        for tool_id, tooltip, icon_name in tool_defs:
+            btn = QToolButton(self)
+            btn.setCheckable(True)
+            btn.setToolTip(tooltip)
+            btn.setIconSize(QSize(18, 18))
+            btn.setMinimumSize(QSize(28, 28))
+            icon_path = self._icon_path(icon_name)
+            if icon_path:
+                btn.setIcon(QIcon(icon_path))
+            else:
+                btn.setText(tooltip)
+            btn.setStyleSheet(checked_style)
+            btn.clicked.connect(lambda checked=False, t=tool_id: self._on_annotation_tool_changed(t))
+            self.annotation_tool_group.addButton(btn)
+            self.annotation_tool_buttons[tool_id] = btn
+            self.annotation_toolbar.addWidget(btn)
+
+        self.btnClearAnnotation = QToolButton(self)
+        self.btnClearAnnotation.setToolTip(_("Clear All Selections"))
+        self.btnClearAnnotation.setIconSize(QSize(18, 18))
+        self.btnClearAnnotation.setMinimumSize(QSize(28, 28))
+        trash_icon = self._icon_path("track-delete-enabled.svg")
+        if trash_icon:
+            self.btnClearAnnotation.setIcon(QIcon(trash_icon))
+        else:
+            self.btnClearAnnotation.setText(_("Reset"))
+        self.btnClearAnnotation.clicked.connect(self._clear_current_frame_annotations)
+        self.annotation_toolbar.addSpacing(8)
+        self.annotation_toolbar.addWidget(self.btnClearAnnotation)
+        self.annotation_toolbar.addStretch(1)
+
+        self.lblDefinedFrames = QLabel("")
+        self.annotation_toolbar.addWidget(self.lblDefinedFrames)
+        self.verticalLayout.insertLayout(1, self.annotation_toolbar)
+
+        # Default tool
+        default_btn = self.annotation_tool_buttons.get("positive_point")
+        if default_btn:
+            default_btn.setChecked(True)
+        self._on_annotation_tool_changed("positive_point")
+
+    def _on_annotation_tool_changed(self, tool_id):
+        if hasattr(self, "videoPreview") and self.videoPreview is not None:
+            self.videoPreview.region_annotation_tool = str(tool_id or "positive_point")
+
+    def _capture_current_annotation(self):
+        def _points_to_payload(items):
+            payload = []
+            for p in items or []:
+                try:
+                    payload.append({"x": float(p.x()), "y": float(p.y())})
+                except Exception:
+                    continue
+            return payload
+
+        def _rects_to_payload(items):
+            payload = []
+            for r in items or []:
+                if not isinstance(r, QRectF):
+                    continue
+                n = r.normalized()
+                payload.append({
+                    "x1": float(n.left()),
+                    "y1": float(n.top()),
+                    "x2": float(n.right()),
+                    "y2": float(n.bottom()),
+                })
+            return payload
+
+        return {
+            "positive_points": _points_to_payload(self.videoPreview.region_points_positive),
+            "negative_points": _points_to_payload(self.videoPreview.region_points_negative),
+            "positive_rects": _rects_to_payload(self.videoPreview.region_rects_positive),
+            "negative_rects": _rects_to_payload(self.videoPreview.region_rects_negative),
+        }
+
+    def _has_any_annotation(self, payload):
+        return bool(
+            (payload.get("positive_points") or [])
+            or (payload.get("negative_points") or [])
+            or (payload.get("positive_rects") or [])
+            or (payload.get("negative_rects") or [])
+        )
+
+    def _save_current_frame_annotations(self, force=False):
+        if self.selection_mode != "annotate":
+            return
+        frame = int(max(1, self.current_frame))
+        if (not force) and (not self._frame_has_local_keyframe) and (not self._frame_edited):
+            return
+        payload = self._capture_current_annotation()
+        if self._has_any_annotation(payload):
+            self.frame_annotations[frame] = payload
+        elif frame in self.frame_annotations:
+            self.frame_annotations.pop(frame, None)
+        self._frame_has_local_keyframe = frame in self.frame_annotations
+        self._frame_edited = False
+        self._update_defined_frames_label()
+
+    def _load_frame_annotations(self, frame):
+        if self.selection_mode != "annotate":
+            return
+        frame = int(max(1, frame))
+        payload = {}
+        inherited = False
+        if frame in self.frame_annotations:
+            payload = dict(self.frame_annotations.get(frame, {}))
+        else:
+            prior_frames = [f for f in self.frame_annotations.keys() if int(f) <= frame]
+            if prior_frames:
+                nearest = int(sorted(prior_frames)[-1])
+                payload = dict(self.frame_annotations.get(nearest, {}))
+                inherited = True
+        self.videoPreview.region_points_positive = [
+            QPointF(float(p.get("x", 0.0)), float(p.get("y", 0.0)))
+            for p in (payload.get("positive_points") or [])
+            if isinstance(p, dict)
+        ]
+        self.videoPreview.region_points_negative = [
+            QPointF(float(p.get("x", 0.0)), float(p.get("y", 0.0)))
+            for p in (payload.get("negative_points") or [])
+            if isinstance(p, dict)
+        ]
+        self.videoPreview.region_rects_positive = [
+            QRectF(
+                QPointF(float(r.get("x1", 0.0)), float(r.get("y1", 0.0))),
+                QPointF(float(r.get("x2", 0.0)), float(r.get("y2", 0.0))),
+            ).normalized()
+            for r in (payload.get("positive_rects") or [])
+            if isinstance(r, dict)
+        ]
+        self.videoPreview.region_rects_negative = [
+            QRectF(
+                QPointF(float(r.get("x1", 0.0)), float(r.get("y1", 0.0))),
+                QPointF(float(r.get("x2", 0.0)), float(r.get("y2", 0.0))),
+            ).normalized()
+            for r in (payload.get("negative_rects") or [])
+            if isinstance(r, dict)
+        ]
+        self.videoPreview.region_rect_drag_start = None
+        self.videoPreview.region_rect_drag_current = None
+        self.videoPreview.region_annotation_inherited = bool(inherited)
+        self.videoPreview.update()
+        self._frame_has_local_keyframe = frame in self.frame_annotations
+        self._frame_edited = False
+
+    def _clear_current_frame_annotations(self):
+        if self.selection_mode != "annotate":
+            return
+        self.frame_annotations = {}
+        self._frame_has_local_keyframe = False
+        self._frame_edited = False
+        self.videoPreview.region_points_positive = []
+        self.videoPreview.region_points_negative = []
+        self.videoPreview.region_rects_positive = []
+        self.videoPreview.region_rects_negative = []
+        self.videoPreview.region_rect_drag_start = None
+        self.videoPreview.region_rect_drag_current = None
+        self.videoPreview.region_annotation_inherited = False
+        self.videoPreview.update()
+        self._update_defined_frames_label()
+        self._refresh_marker_bar()
+
+    def _on_video_annotation_changed(self):
+        if self.selection_mode != "annotate":
+            return
+        self._frame_edited = True
+        self._save_current_frame_annotations(force=True)
+        self._refresh_marker_bar()
+
+    def _update_defined_frames_label(self):
+        if self.selection_mode != "annotate" or not hasattr(self, "lblDefinedFrames"):
+            return
+        frames = sorted(self.frame_annotations.keys())
+        if not frames:
+            self.lblDefinedFrames.setText("")
+            return
+        preview = ", ".join(str(f) for f in frames[:10])
+        if len(frames) > 10:
+            preview = "{} ...".format(preview)
+        self.lblDefinedFrames.setText(get_app()._tr("Frames: {}").format(preview))
+        self._refresh_marker_bar()
+
+    def _refresh_marker_bar(self):
+        if self.selection_mode != "annotate" or not hasattr(self, "sliderVideo"):
+            return
+        if not hasattr(self.sliderVideo, "set_frames"):
+            return
+        markers = []
+        for frame in sorted(self.frame_annotations.keys()):
+            payload = self.frame_annotations.get(frame, {}) or {}
+            has_pos = bool((payload.get("positive_points") or []) or (payload.get("positive_rects") or []))
+            has_neg = bool((payload.get("negative_points") or []) or (payload.get("negative_rects") or []))
+            kind = "both" if (has_pos and has_neg) else ("negative" if has_neg else "positive")
+            markers.append((int(frame), kind))
+        self.sliderVideo.set_frames(self.video_length, self.current_frame, markers)
+
+    def _on_marker_frame_clicked(self, frame_number):
+        frame_number = int(max(1, min(int(frame_number), int(self.video_length))))
+        self.sliderVideo.setValue(frame_number)
+
+    def selection_payload(self):
+        return dict(self._selected_payload or {})
+
+    def selected_rect_normalized(self):
+        if isinstance(self._selected_rect_normalized, dict):
+            return dict(self._selected_rect_normalized)
+        return None
+
+    def selected_region_qimage(self):
+        return self._selected_region_qimage
+
     def movePlayhead(self, frame_number):
         """Update the playhead position"""
 
+        if self.selection_mode == "annotate" and int(frame_number) != int(self.current_frame):
+            self._save_current_frame_annotations()
         self.current_frame = frame_number
         # Move slider to correct frame position
         self.sliderIgnoreSignal = True
@@ -206,6 +647,9 @@ def movePlayhead(self, frame_number):
 
         # Update label
         self.lblVideoTime.setText(timestamp)
+        if self.selection_mode == "annotate":
+            self._load_frame_annotations(frame_number)
+            self._refresh_marker_bar()
 
     def btnPlay_clicked(self, force=None):
         log.info("btnPlay_clicked")
@@ -230,6 +674,11 @@ def btnPlay_clicked(self, force=None):
     def sliderVideo_valueChanged(self, new_frame):
         if self.preview_thread and not self.sliderIgnoreSignal:
             log.info('sliderVideo_valueChanged: %s' % new_frame)
+            if self.selection_mode == "annotate":
+                self._save_current_frame_annotations()
+                self.current_frame = int(new_frame)
+                self._load_frame_annotations(new_frame)
+                self._refresh_marker_bar()
 
             # Pause video
             self.btnPlay_clicked(force="pause")
@@ -243,8 +692,8 @@ def accept(self):
         app = get_app()
         _ = app._tr
 
-        # Check if the sliderVideo is not at its minimum value
-        if self.sliderVideo.value() != self.sliderVideo.minimum():
+        # Legacy behavior for rect/point modes: require frame 1 selection.
+        if self.selection_mode in ("rect", "point") and self.sliderVideo.value() != self.sliderVideo.minimum():
             # Show a warning message box to the user
             QMessageBox.warning(self, _("Invalid Region"),
                                 _("Please choose a region at the beginning of the clip"))
@@ -253,7 +702,55 @@ def accept(self):
             self.sliderVideo.setValue(self.sliderVideo.minimum())
             return
 
+        if self.selection_mode == "point" and not self.videoPreview.region_points_positive:
+            QMessageBox.warning(self, _("Invalid Selection"), _("Please select at least one point."))
+            return
+        if self.selection_mode == "rect":
+            top_left = getattr(self.videoPreview, "regionTopLeftHandle", None)
+            bottom_right = getattr(self.videoPreview, "regionBottomRightHandle", None)
+            if top_left is None or bottom_right is None:
+                QMessageBox.warning(self, _("Invalid Selection"), _("Please draw a rectangle region."))
+                return
+            curr_frame_size = getattr(self.videoPreview, "curr_frame_size", None)
+            if curr_frame_size and curr_frame_size.width() > 0 and curr_frame_size.height() > 0:
+                x1 = float(top_left.x()) / float(curr_frame_size.width())
+                y1 = float(top_left.y()) / float(curr_frame_size.height())
+                x2 = float(bottom_right.x()) / float(curr_frame_size.width())
+                y2 = float(bottom_right.y()) / float(curr_frame_size.height())
+                left = min(x1, x2)
+                top = min(y1, y2)
+                right = max(x1, x2)
+                bottom = max(y1, y2)
+                self._selected_rect_normalized = {
+                    "normalized_x": left,
+                    "normalized_y": top,
+                    "normalized_width": max(0.0, right - left),
+                    "normalized_height": max(0.0, bottom - top),
+                }
+            region_qimage = getattr(self.videoPreview, "region_qimage", None)
+            if region_qimage:
+                self._selected_region_qimage = region_qimage.copy()
+        if self.selection_mode == "annotate":
+            self._save_current_frame_annotations()
+            if not self.frame_annotations:
+                QMessageBox.warning(self, _("Invalid Selection"), _("Please select at least one point or rectangle."))
+                return
+            sorted_frames = sorted(self.frame_annotations.keys())
+            seed_frame = int(sorted_frames[0]) if sorted_frames else int(self.current_frame)
+            self._selected_payload = {
+                "version": 1,
+                "seed_frame": seed_frame,
+                "frames": {
+                    str(frame): dict(self.frame_annotations.get(frame, {}))
+                    for frame in sorted_frames
+                },
+            }
+        else:
+            self._selected_payload = {}
+
         # Continue with the rest of the accept method
+        self._selected_points = self.selected_points()
+        self._selected_points_negative = self.selected_points_negative()
         self.shutdownPlayer()
         get_app().window.SelectRegionSignal.emit("")
         super(SelectRegion, self).accept()
@@ -279,4 +776,18 @@ def reject(self):
         get_app().window.SelectRegionSignal.emit("")
         super(SelectRegion, self).reject()
 
-
+    def selected_points(self):
+        if self._selected_points:
+            return list(self._selected_points)
+        points = []
+        for point in getattr(self.videoPreview, "region_points_positive", []) or []:
+            points.append({"x": float(point.x()), "y": float(point.y())})
+        return points
+
+    def selected_points_negative(self):
+        if self._selected_points_negative:
+            return list(self._selected_points_negative)
+        points = []
+        for point in getattr(self.videoPreview, "region_points_negative", []) or []:
+            points.append({"x": float(point.x()), "y": float(point.y())})
+        return points
diff --git a/src/windows/video_widget.py b/src/windows/video_widget.py
index 3bf4ff826..c671ea655 100644
--- a/src/windows/video_widget.py
+++ b/src/windows/video_widget.py
@@ -31,7 +31,7 @@
 import uuid
 
 from PyQt5.QtCore import (
-    Qt, QCoreApplication, QMutex, QTimer,
+    Qt, QCoreApplication, QMutex, QTimer, pyqtSignal,
     QPoint, QPointF, QSize, QSizeF, QRect, QRectF,
 )
 from PyQt5.QtGui import (
@@ -50,6 +50,7 @@
 
 class VideoWidget(QWidget, updates.UpdateInterface):
     """ A QWidget used on the video display widget """
+    regionAnnotationChanged = pyqtSignal()
 
     def _snap_angle(self, angle_degrees, step_degrees=15.0):
         """Snap an angle to the nearest increment (degrees)."""
@@ -660,7 +661,63 @@ def paintEvent(self, event, *args):
                 painter.setTransform(self.region_transform)
 
                 cs = self.cs
-                if self.regionTopLeftHandle and self.regionBottomRightHandle:
+                if self.region_selection_mode in ("point", "annotate"):
+                    point_radius = max(2.0, (cs * 0.4) / max(self.zoom, 0.001))
+                    if self.region_points_positive:
+                        pos_color = QColor("#53a0ed")
+                        pos_color.setAlphaF(self.handle_opacity)
+                        pos_pen = QPen(QBrush(pos_color), 1.5)
+                        pos_pen.setCosmetic(True)
+                        painter.setPen(pos_pen)
+                        painter.setBrush(QBrush(pos_color))
+                        for pt in self.region_points_positive:
+                            painter.drawEllipse(pt, point_radius, point_radius)
+                    if self.region_points_negative:
+                        neg_color = QColor("#e05757")
+                        neg_color.setAlphaF(self.handle_opacity)
+                        neg_pen = QPen(QBrush(neg_color), 1.5)
+                        neg_pen.setCosmetic(True)
+                        painter.setPen(neg_pen)
+                        painter.setBrush(QBrush(neg_color))
+                        for pt in self.region_points_negative:
+                            painter.drawEllipse(pt, point_radius, point_radius)
+                    # Draw positive rectangles
+                    if self.region_rects_positive:
+                        rect_pos_color = QColor("#53a0ed")
+                        rect_pos_color.setAlphaF(self.handle_opacity)
+                        rect_pos_pen = QPen(QBrush(rect_pos_color), 1.5)
+                        rect_pos_pen.setCosmetic(True)
+                        painter.setPen(rect_pos_pen)
+                        painter.setBrush(Qt.NoBrush)
+                        for rect in self.region_rects_positive:
+                            if isinstance(rect, QRectF):
+                                painter.drawRect(rect.normalized())
+
+                    # Draw negative rectangles
+                    if self.region_rects_negative:
+                        rect_neg_color = QColor("#e05757")
+                        rect_neg_color.setAlphaF(self.handle_opacity)
+                        rect_neg_pen = QPen(QBrush(rect_neg_color), 1.5)
+                        rect_neg_pen.setCosmetic(True)
+                        painter.setPen(rect_neg_pen)
+                        painter.setBrush(Qt.NoBrush)
+                        for rect in self.region_rects_negative:
+                            if isinstance(rect, QRectF):
+                                painter.drawRect(rect.normalized())
+
+                    # Draw current dragging rectangle preview
+                    if self.region_rect_drag_start is not None and self.region_rect_drag_current is not None:
+                        drag_color = QColor("#53a0ed")
+                        if str(self.region_annotation_tool or "").endswith("negative_rect"):
+                            drag_color = QColor("#e05757")
+                        drag_color.setAlphaF(self.handle_opacity)
+                        drag_pen = QPen(QBrush(drag_color), 1.5, Qt.DashLine)
+                        drag_pen.setCosmetic(True)
+                        painter.setPen(drag_pen)
+                        painter.setBrush(Qt.NoBrush)
+                        painter.drawRect(QRectF(self.region_rect_drag_start, self.region_rect_drag_current).normalized())
+
+                elif self.regionTopLeftHandle and self.regionBottomRightHandle:
                     color = QColor("#53a0ed")
                     color.setAlphaF(self.handle_opacity)
                     pen = QPen(QBrush(color), 1.5)
@@ -737,6 +794,47 @@ def mousePressEvent(self, event):
         self.rotation_drag_value = None
         self.setCursor(self.hover_cursor)
 
+        if self.region_enabled and self.region_selection_mode == "point" and event.button() == Qt.LeftButton:
+            self._ensure_region_transform()
+            point = self.region_transform_inverted.map(event.pos())
+            point = self._clamp_region_point(point)
+            mods = int(QCoreApplication.instance().keyboardModifiers())
+            if mods & Qt.ControlModifier:
+                self.region_points_negative.append(point)
+            elif mods & Qt.ShiftModifier:
+                self.region_points_positive.append(point)
+            else:
+                # Default click resets to a single positive point.
+                self.region_points_positive = [point]
+                self.region_points_negative = []
+            self.update()
+        elif self.region_enabled and self.region_selection_mode == "annotate" and event.button() == Qt.LeftButton:
+            self._ensure_region_transform()
+            point = self.region_transform_inverted.map(event.pos())
+            point = self._clamp_region_point(point)
+            if bool(self.region_annotation_inherited):
+                # First edit on a carried frame should replace inherited selections.
+                self.region_points_positive = []
+                self.region_points_negative = []
+                self.region_rects_positive = []
+                self.region_rects_negative = []
+                self.region_rect_drag_start = None
+                self.region_rect_drag_current = None
+                self.region_annotation_inherited = False
+            tool = str(self.region_annotation_tool or "positive_point")
+            if tool == "positive_point":
+                self.region_points_positive.append(point)
+                self.update()
+                self.regionAnnotationChanged.emit()
+            elif tool == "negative_point":
+                self.region_points_negative.append(point)
+                self.update()
+                self.regionAnnotationChanged.emit()
+            elif tool in ("positive_rect", "negative_rect"):
+                self.region_rect_drag_start = QPointF(point)
+                self.region_rect_drag_current = QPointF(point)
+                self.update()
+
         # Ignore undo/redo history temporarily (to avoid a huge pile of undo/redo history)
         get_app().updates.ignore_history = True
 
@@ -763,9 +861,28 @@ def mouseReleaseEvent(self, event):
         self.rotation_drag_value = None
         self.region_mode = None
 
+        if self.region_enabled and self.region_selection_mode == "annotate":
+            if self.region_rect_drag_start is not None and self.region_rect_drag_current is not None:
+                rect = QRectF(self.region_rect_drag_start, self.region_rect_drag_current).normalized()
+                if rect.width() >= 2.0 and rect.height() >= 2.0:
+                    tool = str(self.region_annotation_tool or "positive_rect")
+                    if tool == "negative_rect":
+                        self.region_rects_negative.append(rect)
+                    else:
+                        self.region_rects_positive.append(rect)
+            self.region_rect_drag_start = None
+            self.region_rect_drag_current = None
+            self.update()
+            self.regionAnnotationChanged.emit()
+
         # Save region image data (as QImage)
         # This can be used other widgets to display the selected region
-        if self.region_enabled:
+        if (
+            self.region_enabled
+            and self.region_selection_mode not in ("point", "annotate")
+            and self.regionTopLeftHandle is not None
+            and self.regionBottomRightHandle is not None
+        ):
             # Get region coordinates
             region_rect = QRectF(
                 self.regionTopLeftHandle.x(),
@@ -1189,6 +1306,23 @@ def mouseMoveEvent(self, event):
             self.update()
 
         if self.region_enabled:
+            if self.region_selection_mode == "annotate":
+                self.setCursor(Qt.CrossCursor)
+                self._ensure_region_transform()
+                if self.region_rect_drag_start is not None and self.mouse_pressed:
+                    current = self.region_transform_inverted.map(event.pos())
+                    self.region_rect_drag_current = self._clamp_region_point(current)
+                    self.update()
+                self.mouse_position = event.pos()
+                self.mutex.unlock()
+                return
+
+            if self.region_selection_mode == "point":
+                self.setCursor(Qt.CrossCursor)
+                self.mouse_position = event.pos()
+                self.mutex.unlock()
+                return
+
             # Modify region selection (x, y, width, height)
             # Corner size
             cs = self.cs
@@ -1579,6 +1713,30 @@ def updateClipProperty(self, clip_id, frame_number, property_key, new_value, ref
             if refresh:
                 get_app().window.refreshFrameSignal.emit()
 
+    def _ensure_region_transform(self):
+        if self.region_transform:
+            return
+        viewport = self.centeredViewport(self.width(), self.height())
+        self.region_transform = QTransform()
+        rx = viewport.x()
+        ry = viewport.y()
+        if rx or ry:
+            self.region_transform.translate(rx, ry)
+        if self.zoom:
+            self.region_transform.scale(self.zoom, self.zoom)
+        self.region_transform_inverted = self.region_transform.inverted()[0]
+
+    def _clamp_region_point(self, point):
+        max_w = float(self.curr_frame_size.width()) if self.curr_frame_size else 0.0
+        max_h = float(self.curr_frame_size.height()) if self.curr_frame_size else 0.0
+        if max_w <= 0.0 or max_h <= 0.0:
+            viewport = self.centeredViewport(self.width(), self.height())
+            max_w = float(viewport.width()) / max(self.zoom, 0.001)
+            max_h = float(viewport.height()) / max(self.zoom, 0.001)
+        x = min(max(float(point.x()), 0.0), max(max_w - 1.0, 0.0))
+        y = min(max(float(point.y()), 0.0), max(max_h - 1.0, 0.0))
+        return QPointF(x, y)
+
     def updateEffectProperty(self, effect_id, frame_number, obj_id, property_key, new_value, refresh=True):
         """Update a keyframe property to a new value, adding or updating keyframes as needed"""
         found_point = False
@@ -1919,6 +2077,16 @@ def regionTriggered(self, clip_id):
         """Handle the 'select region' signal when it's emitted"""
         # Clear transform
         self.region_enabled = bool(clip_id)
+        if not self.region_enabled:
+            self.region_points = []
+            self.region_points_positive = []
+            self.region_points_negative = []
+            self.region_rects_positive = []
+            self.region_rects_negative = []
+            self.region_rect_drag_start = None
+            self.region_rect_drag_current = None
+            self.regionTopLeftHandle = None
+            self.regionBottomRightHandle = None
         get_app().window.refreshFrameSignal.emit()
         self.update_title()
 
@@ -2029,7 +2197,18 @@ def __init__(self, watch_project=True, *args):
         self.original_effect_data = None
         self.region_qimage = None
         self.region_transform = None
+        self.region_transform_inverted = None
         self.region_enabled = False
+        self.region_selection_mode = "rect"
+        self.region_annotation_tool = "positive_point"
+        self.region_points = []
+        self.region_points_positive = []
+        self.region_points_negative = []
+        self.region_rects_positive = []
+        self.region_rects_negative = []
+        self.region_rect_drag_start = None
+        self.region_rect_drag_current = None
+        self.region_annotation_inherited = False
         self.region_mode = None
         self.regionTopLeftHandle = None
         self.regionBottomRightHandle = None
diff --git a/src/windows/views/ai_tools_menu.py b/src/windows/views/ai_tools_menu.py
new file mode 100644
index 000000000..e70a0d8d4
--- /dev/null
+++ b/src/windows/views/ai_tools_menu.py
@@ -0,0 +1,97 @@
+"""
+ @file
+ @brief Shared AI Tools context-menu builder for project files and timeline.
+"""
+
+import os
+from functools import partial
+
+from PyQt5.QtGui import QIcon
+
+from classes.app import get_app
+from classes import info
+from .menu import StyledContextMenu
+
+
+def _trigger_generation(win, template_id, source_file=None, open_dialog=False):
+    win.generation_service.action_generate_trigger(
+        source_file=source_file,
+        template_id=template_id,
+        open_dialog=open_dialog,
+    )
+
+
+def _icon(name):
+    icon_path = os.path.join(info.PATH, "themes", "cosmic", "images", name)
+    if os.path.exists(icon_path):
+        return QIcon(icon_path)
+    return QIcon()
+
+
+def add_ai_tools_menu(win, parent_menu, source_file=None):
+    _ = get_app()._tr
+    if not win.is_comfy_available(force=False):
+        return None
+
+    grouped = win.generation_service.build_menu_templates(source_file=source_file)
+    menu_defs = []
+    if source_file:
+        menu_defs = [("enhance", _("Enhance with AI")), ("unknown", _("Unknown AI"))]
+    else:
+        menu_defs = [("create", _("Create with AI")), ("unknown", _("Unknown AI"))]
+
+    created_menus = []
+    for key, title in menu_defs:
+        templates = list(grouped.get(key, []) or [])
+        if not templates:
+            continue
+        ai_menu = StyledContextMenu(title=title, parent=parent_menu)
+        ai_menu.setIcon(_icon("tool-generate-sparkle.svg"))
+
+        parent_labels = {
+            "track_object": _("Track an Object"),
+        }
+        parent_menus = {}
+
+        inserted_style_separator = False
+        for template in templates:
+            template_key = str(template.get("template_id") or template.get("id") or "")
+            if (
+                key == "enhance"
+                and not inserted_style_separator
+                and template_key in ("img2img-basic", "video2video-basic")
+            ):
+                ai_menu.addSeparator()
+                inserted_style_separator = True
+
+            open_dialog = template.get("open_dialog")
+            if not isinstance(open_dialog, bool):
+                open_dialog = (source_file is None) or bool(template.get("needs_prompt", False))
+
+            template_parent = str(template.get("menu_parent") or "").strip().lower()
+            target_menu = ai_menu
+            if key == "enhance" and template_parent:
+                if template_parent not in parent_menus:
+                    submenu_title = parent_labels.get(template_parent, template_parent.replace("_", " ").title())
+                    submenu = StyledContextMenu(title=submenu_title, parent=ai_menu)
+                    submenu.setIcon(_icon("tool-generate-sparkle.svg"))
+                    ai_menu.addMenu(submenu)
+                    parent_menus[template_parent] = submenu
+                target_menu = parent_menus[template_parent]
+
+            action = target_menu.addAction(_(str(template.get("display_name", ""))))
+            action.setIcon(_icon(win.generation_service.icon_for_template(template)))
+            action.triggered.connect(
+                partial(
+                    _trigger_generation,
+                    win,
+                    template.get("id"),
+                    source_file,
+                    open_dialog,
+                )
+            )
+
+        parent_menu.addMenu(ai_menu)
+        created_menus.append(ai_menu)
+
+    return created_menus[0] if created_menus else None
diff --git a/src/windows/views/files_listview.py b/src/windows/views/files_listview.py
index 31073bc22..40074d246 100644
--- a/src/windows/views/files_listview.py
+++ b/src/windows/views/files_listview.py
@@ -26,19 +26,121 @@
  along with OpenShot Library.  If not, see <http://www.gnu.org/licenses/>.
  """
 
+import os
 import uuid
 
 from PyQt5.QtCore import QSize, Qt, QPoint, QRegExp, QItemSelectionModel
-from PyQt5.QtGui import QDrag, QCursor, QPixmap, QPainter, QIcon
-from PyQt5.QtWidgets import QListView, QAbstractItemView
+from PyQt5.QtGui import QDrag, QCursor, QPixmap, QPainter, QIcon, QColor, QFontMetrics
+from PyQt5.QtWidgets import QListView, QAbstractItemView, QStyledItemDelegate, QStyleOptionViewItem, QStyle
 
 from classes import info
 from classes.app import get_app
 from classes.logger import log
 from classes.query import File
+from .ai_tools_menu import add_ai_tools_menu
 from .menu import StyledContextMenu
 
 
+def _is_generation_placeholder(file_id):
+    return str(file_id or "").startswith("__genjob__:")
+
+
+def _job_id_from_placeholder(file_id):
+    file_id = str(file_id or "")
+    if not _is_generation_placeholder(file_id):
+        return None
+    return file_id.split(":", 1)[1]
+
+
+class FilesListProgressDelegate(QStyledItemDelegate):
+    """Paint a thin progress line over list-view thumbnails."""
+
+    def __init__(self, view):
+        super().__init__(view)
+        self.view = view
+
+    def paint(self, painter, option, index):
+        super().paint(painter, option, index)
+
+        # list_proxy_model index -> proxy_model index -> source model index
+        proxy_index = self.view.files_model.list_proxy_model.mapToSource(index)
+        if not proxy_index or not proxy_index.isValid():
+            return
+        source_index = self.view.files_model.proxy_model.mapToSource(proxy_index)
+        if not source_index or not source_index.isValid():
+            return
+
+        file_id = source_index.sibling(source_index.row(), 5).data(Qt.DisplayRole)
+        queue = getattr(self.view.win, "generation_queue", None)
+        if not file_id or not queue:
+            return
+        badge = queue.get_file_badge(file_id)
+        if not badge and _is_generation_placeholder(file_id):
+            job = queue.get_job(_job_id_from_placeholder(file_id))
+            if job and job.get("status") in ("queued", "running", "canceling"):
+                label = "Queued" if job.get("status") == "queued" else "Generating"
+                badge = {
+                    "status": job.get("status"),
+                    "progress": int(job.get("progress", 0)),
+                    "label": label,
+                    "job_id": job.get("id"),
+                }
+        if not badge:
+            return
+
+        progress = int(badge.get("progress", 0))
+        status = str(badge.get("status", "")).strip().lower()
+        if status in ("queued", "running", "canceling"):
+            # Keep active jobs visible even before numeric progress starts.
+            progress = max(progress, 2)
+        if progress <= 0:
+            return
+
+        opt = QStyleOptionViewItem(option)
+        self.initStyleOption(opt, index)
+        style = opt.widget.style() if opt.widget else self.view.style()
+        deco_rect = style.subElementRect(QStyle.SE_ItemViewItemDecoration, opt, opt.widget)
+        if not deco_rect.isValid():
+            return
+
+        bar_height = 3
+        bar_margin = 2
+        full_rect = deco_rect.adjusted(1, 0, -1, 0)
+        full_rect.setTop(deco_rect.bottom() - bar_height - bar_margin + 1)
+        full_rect.setHeight(bar_height)
+        if full_rect.width() <= 2:
+            return
+
+        fill_width = max(1, int((full_rect.width() * min(progress, 100)) / 100.0))
+        fill_rect = full_rect.adjusted(0, 0, -(full_rect.width() - fill_width), 0)
+
+        painter.save()
+        painter.setPen(Qt.NoPen)
+        painter.setBrush(QColor("#283241"))
+        painter.drawRect(full_rect)
+        painter.setBrush(QColor("#53A0ED"))
+        painter.drawRect(fill_rect)
+        if status == "queued":
+            label = "Queued"
+            fm = QFontMetrics(painter.font())
+            text_w = fm.horizontalAdvance(label)
+            text_h = fm.height()
+            pad_x = 5
+            pad_y = 2
+            badge_w = text_w + (pad_x * 2)
+            badge_h = text_h + (pad_y * 2)
+            badge_bottom = full_rect.top() - 3
+            badge_top = max(deco_rect.top() + 3, badge_bottom - badge_h + 1)
+            badge_rect = deco_rect.adjusted(3, badge_top - deco_rect.top(), 0, 0)
+            badge_rect.setWidth(badge_w)
+            badge_rect.setHeight(badge_h)
+            painter.setBrush(QColor(18, 22, 30, 220))
+            painter.drawRoundedRect(badge_rect, 4, 4)
+            painter.setPen(QColor("#EAF5FF"))
+            painter.drawText(badge_rect, Qt.AlignCenter, label)
+        painter.restore()
+
+
 class FilesListView(QListView):
     """ A ListView QWidget used on the main window """
     drag_item_size = QSize(48, 48)
@@ -62,6 +164,40 @@ def contextMenuEvent(self, event):
         menu = StyledContextMenu(parent=self)
 
         menu.addAction(self.win.actionImportFiles)
+
+        source_file = None
+
+        active_job = None
+        file_id = None
+        if index.isValid():
+            model = self.model()
+            source_index = model.mapToSource(index)
+            id_index = source_index.sibling(source_index.row(), 5)
+            file_id = model.sourceModel().data(id_index, Qt.DisplayRole)
+            if _is_generation_placeholder(file_id):
+                job_id = _job_id_from_placeholder(file_id)
+                queue = getattr(self.win, "generation_queue", None)
+                active_job = queue.get_job(job_id) if queue else None
+                if active_job and active_job.get("status") not in ("queued", "running", "canceling"):
+                    active_job = None
+            else:
+                active_job = self.win.active_generation_job_for_file(file_id)
+                source_file = File.get(id=file_id)
+        add_ai_tools_menu(self.win, menu, source_file=source_file)
+
+        if not active_job:
+            self.win.actionGenerate.setEnabled(self.win.can_open_generate_dialog())
+        if active_job:
+            cancel_action = menu.addAction(_("Cancel Job"))
+            delete_icon_path = os.path.join(info.PATH, "themes", "cosmic", "images", "track-delete-enabled.svg")
+            if os.path.exists(delete_icon_path):
+                cancel_action.setIcon(QIcon(delete_icon_path))
+            else:
+                cancel_action.setIcon(self.win.actionRemove_from_Project.icon())
+            cancel_action.triggered.connect(
+                lambda checked=False, job_id=active_job.get("id"): self.win.cancel_generation_job(job_id)
+            )
+        menu.addSeparator()
         menu.addAction(self.win.actionDetailsView)
 
         if index.isValid():
@@ -78,6 +214,9 @@ def contextMenuEvent(self, event):
 
             # Add edit title option (if svg file)
             file = File.get(id=file_id)
+            if not file:
+                menu.popup(event.globalPos())
+                return
             if file and file.data.get("path").endswith(".svg"):
                 menu.addAction(self.win.actionEditTitle)
                 menu.addAction(self.win.actionDuplicate)
@@ -150,6 +289,12 @@ def startDrag(self, supportedActions):
 
         # Get first column indexes for all selected rows
         selected = self.selectionModel().selectedRows(0)
+        selected = [
+            idx for idx in selected
+            if not _is_generation_placeholder(
+                self.model().sourceModel().data(self.model().mapToSource(idx).sibling(self.model().mapToSource(idx).row(), 5), Qt.DisplayRole)
+            )
+        ]
 
         # Check if there are any selected items
         if not selected:
@@ -266,6 +411,7 @@ def __init__(self, model, *args):
         self.setSelectionMode(QAbstractItemView.ExtendedSelection)
         self.setSelectionBehavior(QAbstractItemView.SelectRows)
         self.setSelectionModel(self.files_model.list_selection_model)
+        self.setItemDelegate(FilesListProgressDelegate(self))
 
         # Keep track of mouse press start position to determine when to start drag
         self.setAcceptDrops(True)
diff --git a/src/windows/views/files_treeview.py b/src/windows/views/files_treeview.py
index 239728395..ed7a2d596 100644
--- a/src/windows/views/files_treeview.py
+++ b/src/windows/views/files_treeview.py
@@ -31,16 +31,112 @@
 import uuid
 
 from PyQt5.QtCore import QSize, Qt, QPoint, QItemSelectionModel
-from PyQt5.QtGui import QDrag, QCursor, QPixmap, QPainter, QIcon
-from PyQt5.QtWidgets import QTreeView, QAbstractItemView, QSizePolicy, QHeaderView
+from PyQt5.QtGui import QDrag, QCursor, QPixmap, QPainter, QIcon, QColor, QFontMetrics
+from PyQt5.QtWidgets import QTreeView, QAbstractItemView, QSizePolicy, QHeaderView, QStyledItemDelegate, QStyleOptionViewItem, QStyle
 
 from classes import info
 from classes.app import get_app
 from classes.logger import log
 from classes.query import File
+from .ai_tools_menu import add_ai_tools_menu
 from .menu import StyledContextMenu
 
 
+def _is_generation_placeholder(file_id):
+    return str(file_id or "").startswith("__genjob__:")
+
+
+def _job_id_from_placeholder(file_id):
+    file_id = str(file_id or "")
+    if not _is_generation_placeholder(file_id):
+        return None
+    return file_id.split(":", 1)[1]
+
+
+class FilesTreeProgressDelegate(QStyledItemDelegate):
+    """Paint a thin progress line over thumbnail cells."""
+
+    def __init__(self, view):
+        super().__init__(view)
+        self.view = view
+
+    def paint(self, painter, option, index):
+        super().paint(painter, option, index)
+
+        if index.column() != 0:
+            return
+
+        file_id = index.sibling(index.row(), 5).data(Qt.DisplayRole)
+        queue = getattr(self.view.win, "generation_queue", None)
+        if not file_id or not queue:
+            return
+        badge = queue.get_file_badge(file_id)
+        if not badge and _is_generation_placeholder(file_id):
+            job = queue.get_job(_job_id_from_placeholder(file_id))
+            if job and job.get("status") in ("queued", "running", "canceling"):
+                label = "Queued" if job.get("status") == "queued" else "Generating"
+                badge = {
+                    "status": job.get("status"),
+                    "progress": int(job.get("progress", 0)),
+                    "label": label,
+                    "job_id": job.get("id"),
+                }
+        if not badge:
+            return
+
+        progress = int(badge.get("progress", 0))
+        status = str(badge.get("status", "")).strip().lower()
+        if status in ("queued", "running", "canceling"):
+            # Keep active jobs visible even before numeric progress starts.
+            progress = max(progress, 2)
+        if progress <= 0:
+            return
+
+        opt = QStyleOptionViewItem(option)
+        self.initStyleOption(opt, index)
+        style = opt.widget.style() if opt.widget else self.view.style()
+        deco_rect = style.subElementRect(QStyle.SE_ItemViewItemDecoration, opt, opt.widget)
+        if not deco_rect.isValid():
+            return
+
+        bar_height = 3
+        bar_margin = 2
+        full_rect = deco_rect.adjusted(1, 0, -1, 0)
+        full_rect.setTop(deco_rect.bottom() - bar_height - bar_margin + 1)
+        full_rect.setHeight(bar_height)
+        if full_rect.width() <= 2:
+            return
+
+        fill_width = max(1, int((full_rect.width() * min(progress, 100)) / 100.0))
+        fill_rect = full_rect.adjusted(0, 0, -(full_rect.width() - fill_width), 0)
+
+        painter.save()
+        painter.setPen(Qt.NoPen)
+        painter.setBrush(QColor("#283241"))
+        painter.drawRect(full_rect)
+        painter.setBrush(QColor("#53A0ED"))
+        painter.drawRect(fill_rect)
+        if status == "queued":
+            label = "Queued"
+            fm = QFontMetrics(painter.font())
+            text_w = fm.horizontalAdvance(label)
+            text_h = fm.height()
+            pad_x = 5
+            pad_y = 2
+            badge_w = text_w + (pad_x * 2)
+            badge_h = text_h + (pad_y * 2)
+            badge_bottom = full_rect.top() - 3
+            badge_top = max(deco_rect.top() + 3, badge_bottom - badge_h + 1)
+            badge_rect = deco_rect.adjusted(3, badge_top - deco_rect.top(), 0, 0)
+            badge_rect.setWidth(badge_w)
+            badge_rect.setHeight(badge_h)
+            painter.setBrush(QColor(18, 22, 30, 220))
+            painter.drawRoundedRect(badge_rect, 4, 4)
+            painter.setPen(QColor("#EAF5FF"))
+            painter.drawText(badge_rect, Qt.AlignCenter, label)
+        painter.restore()
+
+
 class FilesTreeView(QTreeView):
     """ A TreeView QWidget used on the main window """
     drag_item_size = QSize(48, 48)
@@ -64,6 +160,37 @@ def contextMenuEvent(self, event):
         menu = StyledContextMenu(parent=self)
 
         menu.addAction(self.win.actionImportFiles)
+
+        source_file = None
+        active_job = None
+        file_id = None
+        if index.isValid():
+            id_index = index.sibling(index.row(), 5)
+            file_id = index.model().data(id_index, Qt.DisplayRole)
+            if _is_generation_placeholder(file_id):
+                job_id = _job_id_from_placeholder(file_id)
+                queue = getattr(self.win, "generation_queue", None)
+                active_job = queue.get_job(job_id) if queue else None
+                if active_job and active_job.get("status") not in ("queued", "running", "canceling"):
+                    active_job = None
+            else:
+                active_job = self.win.active_generation_job_for_file(file_id)
+                source_file = File.get(id=file_id)
+        add_ai_tools_menu(self.win, menu, source_file=source_file)
+
+        if not active_job:
+            self.win.actionGenerate.setEnabled(self.win.can_open_generate_dialog())
+        if active_job:
+            cancel_action = menu.addAction(_("Cancel Job"))
+            delete_icon_path = os.path.join(info.PATH, "themes", "cosmic", "images", "track-delete-enabled.svg")
+            if os.path.exists(delete_icon_path):
+                cancel_action.setIcon(QIcon(delete_icon_path))
+            else:
+                cancel_action.setIcon(self.win.actionRemove_from_Project.icon())
+            cancel_action.triggered.connect(
+                lambda checked=False, job_id=active_job.get("id"): self.win.cancel_generation_job(job_id)
+            )
+        menu.addSeparator()
         menu.addAction(self.win.actionThumbnailView)
 
         if index.isValid():
@@ -79,6 +206,9 @@ def contextMenuEvent(self, event):
 
             # Add edit title option (if svg file)
             file = File.get(id=file_id)
+            if not file:
+                menu.popup(event.globalPos())
+                return
             if file and file.data.get("path").endswith(".svg"):
                 menu.addAction(self.win.actionEditTitle)
                 menu.addAction(self.win.actionDuplicate)
@@ -146,6 +276,7 @@ def startDrag(self, supportedActions):
 
         # Get first column indexes for all selected rows
         selected = self.selectionModel().selectedRows(0)
+        selected = [idx for idx in selected if not _is_generation_placeholder(idx.sibling(idx.row(), 5).data(Qt.DisplayRole))]
 
         # Check if there are any selected items
         if not selected:
@@ -258,17 +389,29 @@ def value_updated(self, item):
         """ Name or tags updated """
         if self.files_model.ignore_updates:
             return
-
-        # Get translation method
-        _ = get_app()._tr
+        if item is None:
+            return
+        if item.column() not in (1, 2):
+            return
 
         # Determine what was changed
-        file_id = self.files_model.model.item(item.row(), 5).text()
-        name = self.files_model.model.item(item.row(), 1).text()
-        tags = self.files_model.model.item(item.row(), 2).text()
+        file_id_item = self.files_model.model.item(item.row(), 5)
+        name_item = self.files_model.model.item(item.row(), 1)
+        tags_item = self.files_model.model.item(item.row(), 2)
+        if not file_id_item or not name_item or not tags_item:
+            return
+
+        file_id = file_id_item.text()
+        if _is_generation_placeholder(file_id):
+            return
+
+        name = name_item.text()
+        tags = tags_item.text()
 
         # Get file object and update friendly name and tags attribute
         f = File.get(id=file_id)
+        if not f:
+            return
         f.data.update({"name": name or os.path.basename(f.data.get("path"))})
         if "tags" in f.data or tags:
             f.data.update({"tags": tags})
@@ -296,6 +439,7 @@ def __init__(self, model, *args):
         self.setSelectionBehavior(QAbstractItemView.SelectRows)
         self.setSelectionModel(self.files_model.selection_model)
         self.setSortingEnabled(True)
+        self.setItemDelegate(FilesTreeProgressDelegate(self))
 
         self.setAcceptDrops(True)
         self.setDragEnabled(True)
diff --git a/src/windows/views/timeline.py b/src/windows/views/timeline.py
index 812bc0b29..ca85dfa9d 100644
--- a/src/windows/views/timeline.py
+++ b/src/windows/views/timeline.py
@@ -57,7 +57,7 @@
 from .timeline_backend.qwidget import TimelineWidget
 from .timeline_backend.colors import effect_color_hex
 from .menu import StyledContextMenu
-from classes.clip_utils import clamp_timing_to_media
+from classes.clip_utils import clamp_timing_to_media, apply_file_caption_to_clip
 from .retime import retime_clip
 from .repeat import apply_repeat, reset_repeat, RepeatDialog
 
@@ -1057,7 +1057,7 @@ def ShowTimelineMenu(self, position, layer_number):
         if not has_clipboard and not found_gap:
             return
 
-        # Get track object (ignore locked tracks)
+        # Get track object (ignore locked tracks for edit operations)
         track = Track.get(number=layer_number)
         if not track:
             return
@@ -1068,6 +1068,8 @@ def ShowTimelineMenu(self, position, layer_number):
         # New context menu
         menu = StyledContextMenu(parent=self)
 
+        has_edit_actions = False
+
         if found_gap:
             # Add 'Remove Gap' Menu
             menu.addAction(self.window.actionRemoveGap)
@@ -1079,8 +1081,7 @@ def ShowTimelineMenu(self, position, layer_number):
             self.window.actionRemoveGap.triggered.connect(
                 partial(self.RemoveGap_Triggered, found_start, found_end, int(layer_number))
             )
-        if has_clipboard and found_gap:
-            menu.addSeparator()
+            has_edit_actions = True
         if has_clipboard:
             # Add 'Paste' Menu
             Paste_Clip = menu.addAction(_("Paste"))
@@ -1088,6 +1089,7 @@ def ShowTimelineMenu(self, position, layer_number):
             Paste_Clip.triggered.connect(
                 partial(self.Paste_Triggered, MenuCopy.PASTE, [], [])
             )
+            has_edit_actions = True
 
         # Show context menu
         self.context_menu_cursor_position = QCursor.pos()
@@ -4036,6 +4038,9 @@ def addClip(
         if not new_clip.get("reader"):
             return  # Skip this clip
 
+        # If the source file has stored caption text, attach a Caption effect to this new clip.
+        apply_file_caption_to_clip(new_clip, file)
+
         # Determine start, duration, and end using file metadata
         media_type = (file.data or {}).get("media_type")
         start_value = file.data.get("start", new_clip.get("start", 0.0))
diff --git a/src/windows/views/timeline_backend/snap.py b/src/windows/views/timeline_backend/snap.py
index 0638f2a22..6643691b5 100644
--- a/src/windows/views/timeline_backend/snap.py
+++ b/src/windows/views/timeline_backend/snap.py
@@ -175,10 +175,11 @@ def _target_edges_px(self):
                 tolerance_px = max(1.0, min(snap_px, abs(tolerance_px)))
                 keyframe_targets.append((px, tolerance_px))
 
+        frame = float(getattr(self.widget, "current_frame", 1) or 1.0)
+        playhead_seconds = max(0.0, (max(1.0, frame) - 1.0) / self.widget.fps_float)
         playhead_x = (
             self.widget.track_name_width
-            + (self.widget.current_frame / self.widget.fps_float)
-            * pps
+            + playhead_seconds * pps
             - h_offset
         )
         generic_targets.add(playhead_x)
@@ -218,8 +219,9 @@ def keyframe_snap_seconds(self, include_playhead=True):
         fps = float(getattr(self.widget, "fps_float", 0.0) or 0.0)
         playhead_px = None
         if fps > 0.0:
-            frame = float(getattr(self.widget, "current_frame", 0) or 0.0)
-            playhead_px = track_left + (frame / fps) * pps - h_offset
+            frame = float(getattr(self.widget, "current_frame", 1) or 1.0)
+            playhead_seconds = max(0.0, (max(1.0, frame) - 1.0) / fps)
+            playhead_px = track_left + playhead_seconds * pps - h_offset
 
         targets = []
         seen = set()