From 57ec4e32c7ba854546b5c2b53c00d44b58788f8a Mon Sep 17 00:00:00 2001
From: E <ethehot@Es-MacBook-Pro.local>
Date: Sun, 8 Feb 2026 15:45:13 +0100
Subject: [PATCH 1/3] Queue jobs until models available; fix progress tracking
 for UI

- Backend: jobs wait in pending_model until required DiT is installed; promote to queued when model available
- API: get_status returns pendingReason; cancel supports pending_model; POST /api/generate/retry-pending to promote and start after model download
- Frontend: SongList shows 'Waiting for model' + reason and Open Settings; poll sets generationStatus/generationPendingReason; Settings onDownloadComplete calls retryPending; cancel and pending_model handling in poll
- Progress: broaden tqdm regex so all progress lines match (INFO); add progress updater from log parser so job progress/ETA updates for UI even when callback path fails

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 api/ace_step_models.py          |  42 +++++---
 api/generate.py                 | 179 ++++++++++++++++++++++++++++++--
 cdmf_pipeline_ace_step.py       |  15 ++-
 cdmf_state.py                   |  29 +++++-
 generate_ace.py                 | 123 ++++++++++++++++------
 music_forge_ui.py               |  73 ++++++++-----
 ui/App.tsx                      |  18 +++-
 ui/components/CreatePanel.tsx   | 140 +++++++++++++++++++++++--
 ui/components/SettingsModal.tsx | 101 ++++++++++++------
 ui/components/SongList.tsx      |  35 +++++--
 ui/services/api.ts              |  16 ++-
 ui/types.ts                     |   5 +
 12 files changed, 643 insertions(+), 133 deletions(-)

diff --git a/api/ace_step_models.py b/api/ace_step_models.py
index 843a5b2..df4e045 100644
--- a/api/ace_step_models.py
+++ b/api/ace_step_models.py
@@ -5,6 +5,7 @@
 """
 
 from pathlib import Path
+import shutil
 import subprocess
 import sys
 import threading
@@ -24,22 +25,22 @@ def _bundled_downloader_available() -> bool:
 
 bp = Blueprint("api_ace_step_models", __name__)
 
-# DiT variants from Tutorial (DiT Selection Summary)
+# DiT variants from Tutorial (DiT Selection Summary). size_gb: approximate for download confirmation.
 DIT_MODELS = [
-    {"id": "turbo", "label": "Turbo (default)", "description": "Best balance, 8 steps", "steps": 8, "cfg": False},
-    {"id": "turbo-shift1", "label": "Turbo shift=1", "description": "Richer details", "steps": 8, "cfg": False},
-    {"id": "turbo-shift3", "label": "Turbo shift=3", "description": "Clearer timbre", "steps": 8, "cfg": False},
-    {"id": "turbo-continuous", "label": "Turbo continuous", "description": "Flexible shift 1–5", "steps": 8, "cfg": False},
-    {"id": "sft", "label": "SFT", "description": "50 steps, CFG", "steps": 50, "cfg": True},
-    {"id": "base", "label": "Base", "description": "50 steps, CFG; lego/extract/complete", "steps": 50, "cfg": True, "exclusive_tasks": ["lego", "extract", "complete"]},
+    {"id": "turbo", "label": "Turbo (default)", "description": "Best balance, 8 steps", "steps": 8, "cfg": False, "size_gb": 8},
+    {"id": "turbo-shift1", "label": "Turbo shift=1", "description": "Richer details", "steps": 8, "cfg": False, "size_gb": 0.5},
+    {"id": "turbo-shift3", "label": "Turbo shift=3", "description": "Clearer timbre", "steps": 8, "cfg": False, "size_gb": 0.5},
+    {"id": "turbo-continuous", "label": "Turbo continuous", "description": "Flexible shift 1–5", "steps": 8, "cfg": False, "size_gb": 0.5},
+    {"id": "sft", "label": "SFT", "description": "50 steps, CFG", "steps": 50, "cfg": True, "size_gb": 8},
+    {"id": "base", "label": "Base", "description": "50 steps, CFG; lego/extract/complete", "steps": 50, "cfg": True, "exclusive_tasks": ["lego", "extract", "complete"], "size_gb": 8},
 ]
 
-# LM planner options from Tutorial
+# LM planner options from Tutorial. size_gb: approximate for download confirmation.
 LM_MODELS = [
-    {"id": "none", "label": "No LM"},
-    {"id": "0.6B", "label": "0.6B"},
-    {"id": "1.7B", "label": "1.7B (default)"},
-    {"id": "4B", "label": "4B"},
+    {"id": "none", "label": "No LM", "size_gb": 0},
+    {"id": "0.6B", "label": "0.6B", "size_gb": 2},
+    {"id": "1.7B", "label": "1.7B (default)", "size_gb": 4},
+    {"id": "4B", "label": "4B", "size_gb": 10},
 ]
 
 # ACE-Step 1.5 CLI model ids (for acestep-download --model)
@@ -333,6 +334,23 @@ def _do_download_worker(model: str, root: Path) -> None:
         _download_cancel_requested = False
 
 
+@bp.route("/models/disk-space", methods=["GET"])
+def disk_space():
+    """
+    GET /api/ace-step/models/disk-space
+    Returns free and total disk space for the models/checkpoints path (for download confirmation).
+    """
+    try:
+        root = _checkpoint_root()
+        root.mkdir(parents=True, exist_ok=True)
+        usage = shutil.disk_usage(str(root))
+        free_gb = round(usage.free / (1024 ** 3), 2)
+        total_gb = round(usage.total / (1024 ** 3), 2)
+        return jsonify({"free_gb": free_gb, "total_gb": total_gb, "path": str(root)})
+    except Exception as e:
+        return jsonify({"error": str(e), "free_gb": None, "total_gb": None, "path": ""}), 500
+
+
 @bp.route("/models/download", methods=["POST"])
 def download_model():
     """
diff --git a/api/generate.py b/api/generate.py
index 814da55..3f8d7cc 100644
--- a/api/generate.py
+++ b/api/generate.py
@@ -21,7 +21,7 @@ def _uppercase_track_in_instruction(instruction):
         return instruction[: m.start(2)] + m.group(2).upper() + instruction[m.end(2) :]
     return instruction
 
-from cdmf_paths import get_output_dir, get_user_data_dir, load_config
+from cdmf_paths import get_output_dir, get_user_data_dir, get_models_folder, load_config, save_config
 from cdmf_tracks import get_audio_duration, list_lora_adapters, load_track_meta, save_track_meta
 from cdmf_generation_job import GenerationCancelled
 import cdmf_state
@@ -59,6 +59,38 @@ def _is_cancel_requested(job_id: str) -> bool:
         return job_id in _cancel_requested
 
 
+def _is_model_available(dit_tag: str) -> bool:
+    """Return True if the given DiT model is installed and ready (no download needed). Used to promote pending_model jobs."""
+    if not dit_tag or not isinstance(dit_tag, str):
+        return False
+    dit = dit_tag.strip().lower()
+    DIT_15_FOLDERS = {
+        "turbo": "acestep-v15-turbo",
+        "base": "acestep-v15-base",
+        "sft": "acestep-v15-sft",
+        "turbo-shift1": "acestep-v15-turbo-shift1",
+        "turbo-shift3": "acestep-v15-turbo-shift3",
+        "turbo-continuous": "acestep-v15-turbo-continuous",
+    }
+    REQUIRED_SUBDIRS = ("music_dcae_f8c8", "music_vocoder", "ace_step_transformer", "umt5-base")
+    folder = DIT_15_FOLDERS.get(dit)
+    models_root = Path(get_models_folder()) / "checkpoints"
+    if folder:
+        candidate = models_root / folder
+        if not candidate.exists():
+            return False
+        for sub in REQUIRED_SUBDIRS:
+            if not (candidate / sub).exists():
+                return False
+        return True
+    # Legacy v1
+    try:
+        from ace_model_setup import ace_models_present
+        return ace_models_present()
+    except Exception:
+        return False
+
+
 def _refs_dir() -> Path:
     d = get_user_data_dir() / "references"
     d.mkdir(parents=True, exist_ok=True)
@@ -115,10 +147,45 @@ def _on_job_progress(
 register_job_progress_callback(_on_job_progress)
 
 
+def _update_job_progress_from_log(
+    percent: int, current: int, total: int, eta_seconds: float | None
+) -> None:
+    """Update current job progress from parsed tqdm log line (log handler runs in same thread as worker)."""
+    with _jobs_lock:
+        jid = cdmf_state.get_current_generation_job_id()
+        if not jid:
+            return
+        job = _jobs.get(jid)
+        if not job:
+            return
+        job["progressPercent"] = round(float(percent), 1)
+        job["progressSteps"] = f"{current}/{total}"
+        if eta_seconds is not None:
+            job["progressEta"] = round(float(eta_seconds), 1)
+
+
 def _run_generation(job_id: str) -> None:
     """Background: run generate_track_ace and update job."""
     global _generation_busy, _current_job_id
+    prev_config = None
+    config_switched = False
     try:
+        with _jobs_lock:
+            job = _jobs.get(job_id)
+            if not job or job.get("status") != "queued":
+                return
+            job_dit = job.get("dit_model") or "turbo"
+        # If required model is not installed, leave job as pending_model so it runs after user installs it.
+        if not _is_model_available(job_dit):
+            logging.info("[API generate] Job %s waiting for model %s (not installed)", job_id, job_dit)
+            with _jobs_lock:
+                job = _jobs.get(job_id)
+                if job and job.get("status") == "queued":
+                    job["status"] = "pending_model"
+                    job["pendingReason"] = (
+                        f"Model '{job_dit}' is not installed. Install it from Settings → Models to run this job."
+                    )
+            return
         with _jobs_lock:
             job = _jobs.get(job_id)
             if not job or job.get("status") != "queued":
@@ -131,8 +198,20 @@ def _run_generation(job_id: str) -> None:
             _current_job_id = job_id
 
         cdmf_state.set_current_generation_job_id(job_id)
+        cdmf_state.set_progress_updater(_update_job_progress_from_log)
         cancel_check = lambda: _is_cancel_requested(job_id)
-        from generate_ace import generate_track_ace
+        from generate_ace import generate_track_ace, clear_ace_pipeline
+
+        # Use job's dit_model (e.g. base for cover). Temporarily switch config so pipeline loads the right model.
+        with _jobs_lock:
+            j = _jobs.get(job_id)
+            job_dit = (j.get("dit_model") or "turbo") if j else "turbo"
+        prev_config = load_config() or {}
+        prev_config = dict(prev_config)
+        config_switched = job_dit != (prev_config.get("ace_step_dit_model") or "turbo")
+        if config_switched:
+            save_config({**prev_config, "ace_step_dit_model": job_dit})
+            clear_ace_pipeline()
 
         params = job.get("params") or {}
         if not isinstance(params, dict):
@@ -451,20 +530,43 @@ def _run_generation(job_id: str) -> None:
                 job["status"] = "cancelled"
                 job["error"] = "Cancelled by user"
     except Exception as e:
-        logging.exception("Generation job %s failed", job_id)
-        with _jobs_lock:
-            job = _jobs.get(job_id)
-            if job:
-                job["status"] = "failed"
-                job["error"] = str(e)
+        err_msg = str(e)
+        # Keep job queued as pending_model when model is missing so it can run after user installs it.
+        if "not installed" in err_msg.lower() or "Settings → Models" in err_msg:
+            logging.info("[API generate] Job %s waiting for model: %s", job_id, err_msg[:120])
+            with _jobs_lock:
+                job = _jobs.get(job_id)
+                if job:
+                    job["status"] = "pending_model"
+                    job["pendingReason"] = err_msg
+                    job["error"] = None
+        else:
+            logging.exception("Generation job %s failed", job_id)
+            with _jobs_lock:
+                job = _jobs.get(job_id)
+                if job:
+                    job["status"] = "failed"
+                    job["error"] = err_msg
     finally:
+        cdmf_state.set_progress_updater(None)
+        if config_switched and prev_config:
+            save_config(prev_config)
+            clear_ace_pipeline()
         cdmf_state.set_current_generation_job_id(None)
         _generation_busy = False
         with _jobs_lock:
             _current_job_id = None
             _cancel_requested.discard(job_id)
-        # Start next queued job (skips cancelled: they are no longer "queued")
+        # Promote pending_model jobs to queued when their model is now available; then start first queued job.
         with _jobs_lock:
+            for jid in _job_order:
+                j = _jobs.get(jid)
+                if j and j.get("status") == "pending_model":
+                    dit = j.get("dit_model") or "turbo"
+                    if _is_model_available(dit):
+                        j["status"] = "queued"
+                        j["pendingReason"] = None
+                        logging.info("[API generate] Job %s promoted to queued (model %s now available)", jid, dit)
             for jid in _job_order:
                 j = _jobs.get(jid)
                 if j and j.get("status") == "queued":
@@ -472,6 +574,20 @@ def _run_generation(job_id: str) -> None:
                     break
 
 
+@bp.route("/model-download-status", methods=["GET"])
+def get_model_download_status():
+    """GET /api/generate/model-download-status — whether pipeline is loading (may be downloading model files)."""
+    try:
+        st = getattr(cdmf_state, "GENERATION_MODEL_LOADING", {})
+        return jsonify({
+            "in_progress": bool(st.get("in_progress")),
+            "message": st.get("message") or "Preparing model (downloading if needed)...",
+        })
+    except Exception as e:
+        logging.warning("[API generate] model-download-status failed: %s", e)
+        return jsonify({"in_progress": False, "message": ""})
+
+
 @bp.route("/lora_adapters", methods=["GET"])
 def get_lora_adapters():
     """GET /api/generate/lora_adapters — list LoRA adapters (e.g. from Training or custom_lora)."""
@@ -554,7 +670,15 @@ def _str(v):
         except (TypeError, ValueError):
             params_copy = {}
         config = load_config()
-        dit_tag = config.get("ace_step_dit_model") or params_copy.get("aceStepDitModel") or "turbo"
+        # User override from Generation tab model selector takes precedence; else auto base for cover (per docs).
+        task_for_dit = (params_copy.get("task_type") or params_copy.get("taskType") or "text2music").strip().lower()
+        explicit_dit = (params_copy.get("aceStepDitModel") or params_copy.get("ace_step_dit_model") or "").strip()
+        if explicit_dit:
+            dit_tag = explicit_dit
+        elif task_for_dit == "cover":
+            dit_tag = "base"
+        else:
+            dit_tag = config.get("ace_step_dit_model") or "turbo"
         lm_tag = config.get("ace_step_lm") or params_copy.get("aceStepLm") or "1.7B"
         with _jobs_lock:
             _jobs[job_id] = {
@@ -608,10 +732,42 @@ def get_status(job_id: str):
         "progressStage": job.get("progressStage"),
         "result": job.get("result"),
         "error": job.get("error"),
+        "pendingReason": job.get("pendingReason") if status == "pending_model" else None,
     }
     return jsonify(out)
 
 
+@bp.route("/retry-pending", methods=["POST"])
+def retry_pending():
+    """POST /api/generate/retry-pending — promote pending_model jobs to queued if model is now available, start first queued job. Call after model download completes."""
+    global _generation_busy
+    promoted = 0
+    started = None
+    with _jobs_lock:
+        for jid in _job_order:
+            j = _jobs.get(jid)
+            if j and j.get("status") == "pending_model":
+                dit = j.get("dit_model") or "turbo"
+                if _is_model_available(dit):
+                    j["status"] = "queued"
+                    j["pendingReason"] = None
+                    promoted += 1
+                    logging.info("[API generate] Job %s promoted to queued (model %s now available)", jid, dit)
+        if not _generation_busy:
+            for jid in _job_order:
+                j = _jobs.get(jid)
+                if j and j.get("status") == "queued":
+                    _generation_busy = True
+                    threading.Thread(target=_run_generation, args=(jid,), daemon=True).start()
+                    started = jid
+                    break
+    return jsonify({
+        "ok": True,
+        "promoted": promoted,
+        "startedJobId": started,
+    })
+
+
 @bp.route("/unstick", methods=["POST"])
 def unstick_queue():
     """POST /api/generate/unstick — clear stuck worker state and start the next queued job (if any)."""
@@ -641,9 +797,10 @@ def cancel_job(job_id: str):
         if not job:
             return jsonify({"error": "Job not found"}), 404
         status = job.get("status", "unknown")
-        if status == "queued":
+        if status in ("queued", "pending_model"):
             job["status"] = "cancelled"
             job["error"] = "Cancelled by user"
+            job["pendingReason"] = None
             return jsonify({"cancelled": True, "jobId": job_id, "message": "Job removed from queue."})
         if status == "running":
             _cancel_requested.add(job_id)
diff --git a/cdmf_pipeline_ace_step.py b/cdmf_pipeline_ace_step.py
index 5780a74..0e3aa57 100644
--- a/cdmf_pipeline_ace_step.py
+++ b/cdmf_pipeline_ace_step.py
@@ -1195,7 +1195,8 @@ def add_latents_noise(
                 sigma_max=sigma_max
             )
 
-        infer_steps = int(sigma_max * infer_steps)
+        # Ensure enough steps for cover/audio2audio so reference is audible (INFERENCE.md: base 32-64 recommended).
+        infer_steps = max(16, int(sigma_max * infer_steps))
         timesteps, num_inference_steps = retrieve_timesteps(
             scheduler,
             num_inference_steps=infer_steps,
@@ -1295,6 +1296,17 @@ def text2music_diffusion_process(
         
         if ref_latents is not None:
             frame_length = ref_latents.shape[-1]
+            # Cap ref length for cover/audio2audio so each diffusion step stays fast (avoids 80s+ per step on long refs)
+            max_cover_sec = float(os.environ.get("ACE_COVER_MAX_REF_SECONDS", "90"))
+            max_cover_frames = int(max_cover_sec * 44100 / 512 / 8)
+            if frame_length > max_cover_frames:
+                ref_latents = ref_latents[:, :, :, :max_cover_frames].contiguous()
+                frame_length = max_cover_frames
+                logger.info(
+                    "Capped ref_latents to %d frames (~%.0fs) for faster cover/audio2audio generation (set ACE_COVER_MAX_REF_SECONDS to override).",
+                    max_cover_frames,
+                    max_cover_sec,
+                )
 
         if len(oss_steps) > 0:
             infer_steps = max(oss_steps)
@@ -2087,6 +2099,7 @@ def __call__(
         
         ref_latents = None
         if ref_audio_input is not None and audio2audio_enable:
+            # For cover mode: ref_audio_input = source song (song to cover), per docs/ACE-Step-INFERENCE.md.
             assert ref_audio_input is not None, "ref_audio_input is required for audio2audio task"
             assert os.path.exists(
                 ref_audio_input
diff --git a/cdmf_state.py b/cdmf_state.py
index 4517d3d..3776cc3 100644
--- a/cdmf_state.py
+++ b/cdmf_state.py
@@ -4,7 +4,7 @@
 
 import threading
 import time
-from typing import Optional, Dict, Any
+from typing import Optional, Dict, Any, Callable
 
 from ace_model_setup import ace_models_present
 
@@ -26,6 +26,24 @@ def get_current_generation_job_id() -> Optional[str]:
     return getattr(_current_job_id_holder, "job_id", None)
 
 
+# ---------------------------------------------------------------------------
+# Progress updater (called from log handler when tqdm progress is parsed)
+# ---------------------------------------------------------------------------
+
+_progress_updater: Optional[Callable[[int, int, int, Optional[float]], None]] = None
+
+
+def set_progress_updater(cb: Optional[Callable[[int, int, int, Optional[float]], None]]) -> None:
+    """Set a callback(percent, current, total, eta_seconds) used to update API job from parsed log progress."""
+    global _progress_updater
+    _progress_updater = cb
+
+
+def get_progress_updater() -> Optional[Callable[[int, int, int, Optional[float]], None]]:
+    """Return the current progress updater, or None."""
+    return _progress_updater
+
+
 # ---------------------------------------------------------------------------
 # Generation progress (shared with /progress endpoint and model downloads)
 # ---------------------------------------------------------------------------
@@ -57,6 +75,15 @@ def get_current_generation_job_id() -> Optional[str]:
     "message": "",
 }
 
+# ---------------------------------------------------------------------------
+# Generation pipeline loading (may trigger HuggingFace model download on first use)
+# ---------------------------------------------------------------------------
+
+GENERATION_MODEL_LOADING: Dict[str, Any] = {
+    "in_progress": False,
+    "message": "Preparing model (downloading if needed)...",
+}
+
 # ---------------------------------------------------------------------------
 # MuFun-ACEStep analysis model availability
 # ---------------------------------------------------------------------------
diff --git a/generate_ace.py b/generate_ace.py
index 73f0c24..3420ab3 100644
--- a/generate_ace.py
+++ b/generate_ace.py
@@ -180,6 +180,7 @@ def _candy_torchaudio_load(
 # -----------------------------------------------------------------------------
 
 import cdmf_paths
+import cdmf_state
 
 # Default target length + fades (UI can override)
 DEFAULT_TARGET_SECONDS = 150.0
@@ -437,32 +438,91 @@ def _get_ace_pipeline() -> "ACEStepPipeline":
         if _ACE_PIPELINE is not None:
             return _ACE_PIPELINE
 
-        print(
-            "[ACE] Initializing ACEStepPipeline (first time will download/load checkpoints)...",
-            flush=True,
-        )
-        _report_progress(0.05, "ace_load")
-
-        # Make sure our dedicated ACE cache under ace_models/checkpoints is ready.
+        # Notify UI that model may be downloading (pipeline load can trigger HuggingFace fetch).
         try:
-            checkpoint_root = ensure_ace_models()
-        except Exception as exc:
-            raise RuntimeError(
-                "Failed to prepare ACE-Step checkpoints. "
-                "See the console logs above for details."
-            ) from exc
+            cdmf_state.GENERATION_MODEL_LOADING["in_progress"] = True
+            cdmf_state.GENERATION_MODEL_LOADING["message"] = "Preparing model (downloading if needed)..."
+        except Exception:
+            pass
 
-        # Wire ACE's internal progress bars into our callback before heavy work starts.
-        _monkeypatch_ace_tqdm()
+        try:
+            print(
+                "[ACE] Initializing ACEStepPipeline (first time will download/load checkpoints)...",
+                flush=True,
+            )
+            _report_progress(0.05, "ace_load")
+
+            # Resolve checkpoint path: use ACE-Step 1.5 model folder from config when present (e.g. base for cover).
+            # Never trigger downloads from generation; require model to be installed via Settings → Models.
+            DIT_15_FOLDERS = {
+                "turbo": "acestep-v15-turbo",
+                "base": "acestep-v15-base",
+                "sft": "acestep-v15-sft",
+                "turbo-shift1": "acestep-v15-turbo-shift1",
+                "turbo-shift3": "acestep-v15-turbo-shift3",
+                "turbo-continuous": "acestep-v15-turbo-continuous",
+            }
+            REQUIRED_SUBDIRS = ("music_dcae_f8c8", "music_vocoder", "ace_step_transformer", "umt5-base")
+            config = cdmf_paths.load_config()
+            dit = (config.get("ace_step_dit_model") or "turbo").strip().lower()
+            folder = DIT_15_FOLDERS.get(dit)
+            models_root = Path(cdmf_paths.get_models_folder()) / "checkpoints"
+            checkpoint_root = None
+            if folder:
+                candidate = models_root / folder
+                if not candidate.exists():
+                    raise RuntimeError(
+                        f"Model '{dit}' is not installed. Please install it from Settings → Models "
+                        "(do not start generation to trigger downloads)."
+                    )
+                for sub in REQUIRED_SUBDIRS:
+                    if not (candidate / sub).exists():
+                        raise RuntimeError(
+                            f"Model '{dit}' is not fully installed (missing {sub}). "
+                            "Please install or re-download it from Settings → Models."
+                        )
+                checkpoint_root = candidate
+                print(f"[ACE] Using DiT model '{dit}' at {checkpoint_root}", flush=True)
+            if checkpoint_root is None:
+                # Legacy v1 path: only use if already present; never download from here.
+                from ace_model_setup import ace_models_present
+                if not ace_models_present():
+                    raise RuntimeError(
+                        "ACE-Step model is not installed. Please install it from Settings → Models "
+                        "(do not start generation to trigger downloads)."
+                    )
+                try:
+                    checkpoint_root = ensure_ace_models()
+                except Exception as exc:
+                    raise RuntimeError(
+                        "Failed to prepare ACE-Step checkpoints. "
+                        "See the console logs above for details."
+                    ) from exc
+
+            # Wire ACE's internal progress bars into our callback before heavy work starts.
+            _monkeypatch_ace_tqdm()
+
+            # Tell ACE-Step to use our cache root as its checkpoint_dir so it
+            # doesn't try to re-download into ~/.cache/ace-step/checkpoints.
+            pipeline = ACEStepPipeline(checkpoint_dir=str(checkpoint_root))
+            _ACE_PIPELINE = pipeline
+
+            print("[ACE] ACEStepPipeline ready.", flush=True)
+            return _ACE_PIPELINE
+        finally:
+            try:
+                cdmf_state.GENERATION_MODEL_LOADING["in_progress"] = False
+            except Exception:
+                pass
 
-        # Tell ACE-Step to use our cache root as its checkpoint_dir so it
-        # doesn't try to re-download into ~/.cache/ace-step/checkpoints.
-        pipeline = ACEStepPipeline(checkpoint_dir=str(checkpoint_root))
-        _ACE_PIPELINE = pipeline
+    return _ACE_PIPELINE
 
-        print("[ACE] ACEStepPipeline ready.", flush=True)
 
-    return _ACE_PIPELINE
+def clear_ace_pipeline() -> None:
+    """Clear the cached pipeline so the next call to _get_ace_pipeline() loads fresh (e.g. after switching DiT model)."""
+    global _ACE_PIPELINE
+    with _ACE_PIPELINE_LOCK:
+        _ACE_PIPELINE = None
 
 
 # -----------------------------------------------------------------------------
@@ -569,11 +629,10 @@ def _prepare_reference_audio(
     """
     Normalise the ACE-Step edit / audio2audio mode (task_type, reference_audio, src_audio per Tutorial/INFERENCE):
 
-      - Task (task_type) is clamped to one of: text2music / retake / repaint / extend.
-      - UI tasks "cover" and "audio2audio" are mapped to "retake" (ACE-Step
-        then uses ref_audio_input and sets task to "audio2audio" internally).
-      - If Audio2Audio is enabled while task is still 'text2music', we
-        internally flip it to 'retake' (this is how ACE-Step expects edits).
+      - Task (task_type) is clamped to one of: text2music / audio2audio / retake / repaint / extend.
+      - UI tasks "cover" and "audio2audio" are passed as task="audio2audio" (INFERENCE.md:
+        cover uses src_audio = song to cover; we pass it as ref_audio_input, no retake path).
+      - If Audio2Audio is enabled while task is still 'text2music', we set task to 'audio2audio'.
       - For any edit mode (retake/repaint/extend) we prefer to have a
         reference audio file and make sure ACE-Step sees a .wav path.
         If no reference is provided, we *gracefully* fall back to
@@ -582,19 +641,19 @@ def _prepare_reference_audio(
     task_norm = (task or "text2music").strip().lower()
     if task_norm not in ("text2music", "retake", "repaint", "extend", "cover", "audio2audio", "lego", "extract", "complete"):
         task_norm = "text2music"
-    # Map UI task names to pipeline task: cover and audio2audio both run as retake
-    # (pipeline will set task to "audio2audio" when ref_audio_input is passed).
+    # Per docs/ACE-Step-INFERENCE.md: cover uses src_audio (song to cover) + caption (target style).
+    # Pass task="audio2audio" so the pipeline uses ref_audio_input only (no retake/repaint path).
     if task_norm in ("cover", "audio2audio"):
-        task_norm = "retake"
+        task_norm = "audio2audio"
 
     # Audio2Audio is effectively an edit of an existing clip. If the user
-    # left the task on "Text → music", run it as a retake under the hood.
+    # left the task on "Text → music" but provided ref audio, run as audio2audio.
     if audio2audio_enable and task_norm == "text2music":
-        task_norm = "retake"
+        task_norm = "audio2audio"
 
     # Any of the edit-style tasks imply some form of Audio2Audio or source-backed (lego/extract/complete).
     audio2audio_flag = bool(
-        audio2audio_enable or task_norm in ("retake", "repaint", "extend")
+        audio2audio_enable or task_norm in ("audio2audio", "retake", "repaint", "extend")
     )
     needs_src_path = audio2audio_flag or task_norm in ("lego", "extract", "complete")
 
diff --git a/music_forge_ui.py b/music_forge_ui.py
index 369af35..d3722c7 100644
--- a/music_forge_ui.py
+++ b/music_forge_ui.py
@@ -265,22 +265,30 @@ def _should_filter(self, line):
         
         return False
     
+    def _parse_eta_seconds(self, time_info):
+        """Parse tqdm time_info like '01:34<23:38, 94.54s/it' -> remaining seconds (23*60+38)."""
+        if not time_info:
+            return None
+        # Match <MM:SS for remaining time
+        m = re.search(r"<(\d+):(\d+)", time_info)
+        if m:
+            return int(m.group(1)) * 60 + int(m.group(2))
+        return None
+
     def _extract_progress(self, line):
-        """Extract progress bar information from tqdm output"""
-        # Match tqdm progress bar format: " 50%|#####     | 35/70 [05:13<00:52,  1.50s/it]"
-        progress_pattern = r'(\d+)%\s*\|\s*[#\s]+\|\s*(\d+)/(\d+)\s+\[([^\]]+)\]'
+        """Extract progress bar information from tqdm output. Returns (display_msg, percent, current, total, eta_seconds) or None."""
+        # Match tqdm: " 50%|#####     | 35/70 [05:13<00:52,  1.50s/it]" or "  6%|6         | 1/16 [01:34<23:38, 94.54s/it]"
+        progress_pattern = r"(\d+)%\s*\|\s*[#\s\d]+\|\s*(\d+)/(\d+)\s+\[([^\]]+)\]"
         match = re.search(progress_pattern, line)
-        
-        if match:
-            percent = int(match.group(1))
-            current = int(match.group(2))
-            total = int(match.group(3))
-            time_info = match.group(4)
-            
-            # Format as clean progress message
-            return f"[Progress] {percent}% ({current}/{total} steps) - {time_info}"
-        
-        return None
+        if not match:
+            return None
+        percent = int(match.group(1))
+        current = int(match.group(2))
+        total = int(match.group(3))
+        time_info = match.group(4)
+        eta_seconds = self._parse_eta_seconds(time_info)
+        display_msg = f"[Progress] {percent}% ({current}/{total} steps) - {time_info}"
+        return (display_msg, percent, current, total, eta_seconds)
 
     def _prefix_job_id(self, msg):
         """If a generation job is active in this thread, prefix the message with job id."""
@@ -311,13 +319,19 @@ def write(self, buf):
                 if self._should_filter(line_clean):
                     continue
                 
-                # Try to extract progress bar info
-                progress_msg = self._extract_progress(line_clean)
-                if progress_msg:
-                    # Only log if it's different from last progress (avoid duplicates)
-                    if progress_msg != self.last_progress:
-                        self.logger.log(logging.INFO, self._prefix_job_id(progress_msg))
-                        self.last_progress = progress_msg
+                # Try to extract progress bar info (returns (display_msg, percent, current, total, eta_seconds) or None)
+                progress_data = self._extract_progress(line_clean)
+                if progress_data:
+                    display_msg, percent, current, total, eta_seconds = progress_data
+                    if display_msg != self.last_progress:
+                        self.logger.log(logging.INFO, self._prefix_job_id(display_msg))
+                        self.last_progress = display_msg
+                    try:
+                        updater = cdmf_state.get_progress_updater()
+                        if updater:
+                            updater(percent, current, total, eta_seconds)
+                    except Exception:
+                        pass
                     continue
                 
                 # Log other messages normally (with optional job id prefix)
@@ -334,11 +348,18 @@ def flush(self):
         if self.linebuf:
             line_clean = self.linebuf.rstrip()
             if line_clean and not self._should_filter(line_clean):
-                progress_msg = self._extract_progress(line_clean)
-                if progress_msg:
-                    if progress_msg != self.last_progress:
-                        self.logger.log(logging.INFO, self._prefix_job_id(progress_msg))
-                        self.last_progress = progress_msg
+                progress_data = self._extract_progress(line_clean)
+                if progress_data:
+                    display_msg, percent, current, total, eta_seconds = progress_data
+                    if display_msg != self.last_progress:
+                        self.logger.log(logging.INFO, self._prefix_job_id(display_msg))
+                        self.last_progress = display_msg
+                    try:
+                        updater = cdmf_state.get_progress_updater()
+                        if updater:
+                            updater(percent, current, total, eta_seconds)
+                    except Exception:
+                        pass
                 else:
                     self.logger.log(self.log_level, self._prefix_job_id(line_clean))
             self.linebuf = ''
diff --git a/ui/App.tsx b/ui/App.tsx
index e3cd9a3..996d676 100644
--- a/ui/App.tsx
+++ b/ui/App.tsx
@@ -674,6 +674,7 @@ export default function App() {
         scoreScale: params.scoreScale,
         lmBatchChunkSize: params.lmBatchChunkSize,
         isFormatCaption: params.isFormatCaption,
+        ...(params.aceStepDitModel ? { aceStepDitModel: params.aceStepDitModel } : {}),
         ...(prefs.output_dir ? { outputDir: prefs.output_dir } : {}),
       };
       console.log('[Create] Calling POST /api/generate');
@@ -684,15 +685,17 @@ export default function App() {
         try {
           const status = await generateApi.getStatus(job.jobId, token ?? '');
 
-          // Update queue position and progress on the temp song
+          // Update queue position and progress on the temp song (including pending_model: waiting for model)
           setSongs(prev => prev.map(s => {
             if (s.id === tempId) {
               return {
                 ...s,
-                queuePosition: status.status === 'queued' ? status.queuePosition : undefined,
+                queuePosition: (status.status === 'queued' || status.status === 'pending_model') ? status.queuePosition : undefined,
                 generationPercent: status.status === 'running' ? status.progressPercent : undefined,
                 generationSteps: status.status === 'running' ? status.progressSteps : undefined,
                 generationEtaSeconds: status.status === 'running' && status.etaSeconds != null ? status.etaSeconds : undefined,
+                generationStatus: status.status as Song['generationStatus'],
+                generationPendingReason: status.pendingReason ?? undefined,
               };
             }
             return s;
@@ -709,7 +712,14 @@ export default function App() {
             cleanupJob(job.jobId, tempId);
             console.error(`Job ${job.jobId} failed:`, status.error);
             showToast(`Generation failed: ${status.error || 'Unknown error'}`, 'error');
+          } else if (status.status === 'cancelled') {
+            cleanupJob(job.jobId, tempId);
+            setSongs(prev => prev.filter(song => song.id !== tempId));
+          } else if (status.status === 'pending_model') {
+            // Promote to queued when model becomes available (e.g. user installed from Settings)
+            generateApi.retryPending().catch(() => {});
           }
+          // pending_model and queued: keep polling; job stays in list
         } catch (pollError) {
           console.error(`Polling error for job ${job.jobId}:`, pollError);
           cleanupJob(job.jobId, tempId);
@@ -1057,6 +1067,7 @@ export default function App() {
                 onNavigateToProfile={handleNavigateToProfile}
                 onReusePrompt={handleReuse}
                 onDelete={handleDeleteSong}
+                onOpenSettings={() => setShowSettingsModal(true)}
               />
             </div>
             {showRightSidebar && (
@@ -1104,6 +1115,7 @@ export default function App() {
                 isGenerating={isGenerating}
                 initialData={reuseData}
                 onOpenSettings={() => setShowSettingsModal(true)}
+                onOpenConsoleLogs={() => setShowConsole(true)}
               />
             </div>
 
@@ -1130,6 +1142,7 @@ export default function App() {
                 onNavigateToProfile={handleNavigateToProfile}
                 onReusePrompt={handleReuse}
                 onDelete={handleDeleteSong}
+                onOpenSettings={() => setShowSettingsModal(true)}
               />
             </div>
 
@@ -1265,6 +1278,7 @@ export default function App() {
         theme={theme}
         onToggleTheme={toggleTheme}
         onNavigateToProfile={handleNavigateToProfile}
+        onDownloadComplete={() => generateApi.retryPending().catch(() => {})}
       />
 
       {/* Mobile Details Modal */}
diff --git a/ui/components/CreatePanel.tsx b/ui/components/CreatePanel.tsx
index b902906..c7ec20d 100644
--- a/ui/components/CreatePanel.tsx
+++ b/ui/components/CreatePanel.tsx
@@ -5,7 +5,7 @@ import { useAuth } from '../context/AuthContext';
 import { generateApi, preferencesApi, aceStepModelsApi, type LoraAdapter } from '../services/api';
 
 /** Tasks that require ACE-Step Base model only (see docs/ACE-Step-Tutorial.md). */
-const TASKS_REQUIRING_BASE = ['lego', 'extract', 'complete'] as const;
+const TASKS_REQUIRING_BASE = ['cover', 'lego', 'extract', 'complete'] as const;
 function taskRequiresBase(taskType: string): boolean {
   return TASKS_REQUIRING_BASE.includes(taskType as typeof TASKS_REQUIRING_BASE[number]);
 }
@@ -31,6 +31,8 @@ interface CreatePanelProps {
   initialData?: { song: Song, timestamp: number } | null;
   /** Open Settings modal (e.g. to download required model). */
   onOpenSettings?: () => void;
+  /** Open Console logs panel (e.g. when model is downloading in background). */
+  onOpenConsoleLogs?: () => void;
 }
 
 /** Visible tooltip on hover (native title has delay and is unreliable). */
@@ -155,13 +157,18 @@ const VOCAL_LANGUAGES = [
 // Create panel mode: Simple (description), Custom (full controls), Cover (pure cover: source + caption), Lego (add-instrument tracks)
 type CreateMode = 'simple' | 'custom' | 'cover' | 'lego';
 
-export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerating, initialData, onOpenSettings }) => {
+export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerating, initialData, onOpenSettings, onOpenConsoleLogs }) => {
   const { isAuthenticated, token } = useAuth();
 
   // Mode: simple | custom | cover | lego
   const [createMode, setCreateMode] = useState<CreateMode>('custom');
   const customMode = createMode === 'custom';
 
+  // ACE-Step model for this generation (only installed models). Updated by workflow (e.g. base for cover) or user override.
+  const [generationDitModel, setGenerationDitModel] = useState<string>('turbo');
+  const [installedDitModels, setInstalledDitModels] = useState<Array<{ id: string; label: string; description?: string }>>([]);
+  const [modelDownloadInProgress, setModelDownloadInProgress] = useState(false);
+
   // Cover tab: pure cover (source + caption) or blend (source + style audio)
   const [coverCaption, setCoverCaption] = useState('');
   const [coverStrength, setCoverStrength] = useState(0.8);
@@ -356,6 +363,42 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
   // Fetch LoRA adapters on mount (Training output + custom_lora)
   useEffect(() => { fetchLoraAdapters(); }, [fetchLoraAdapters]);
 
+  // Load installed ACE-Step DiT models and sync generation model from preferences (or default base for cover/lego).
+  useEffect(() => {
+    aceStepModelsApi.list().then((list) => {
+      const installed = (list.dit_models || []).filter((m) => m.installed).map((m) => ({ id: m.id, label: m.label, description: m.description }));
+      setInstalledDitModels(installed);
+      preferencesApi.get().then((prefs) => {
+        const prefDit = (prefs.ace_step_dit_model || 'turbo').trim();
+        const valid = installed.some((m) => m.id === prefDit);
+        setGenerationDitModel(valid ? prefDit : (installed[0]?.id || 'turbo'));
+      }).catch(() => {
+        if (installed.length) setGenerationDitModel(installed[0].id);
+      });
+    }).catch(() => setInstalledDitModels([]));
+  }, []);
+
+  // When switching to Cover or Lego, default model to base (user can override via selector).
+  useEffect(() => {
+    if ((createMode === 'cover' || createMode === 'lego') && installedDitModels.some((m) => m.id === 'base')) {
+      setGenerationDitModel((prev) => (prev === 'turbo' ? 'base' : prev));
+    }
+  }, [createMode, installedDitModels]);
+
+  // Poll model-download status when generating so we can show banner + link to console.
+  useEffect(() => {
+    if (!isGenerating) {
+      setModelDownloadInProgress(false);
+      return;
+    }
+    const poll = () => {
+      generateApi.modelDownloadStatus().then((st) => setModelDownloadInProgress(st.in_progress)).catch(() => setModelDownloadInProgress(false));
+    };
+    poll();
+    const t = setInterval(poll, 2000);
+    return () => clearInterval(t);
+  }, [isGenerating]);
+
   useEffect(() => {
     const handleMouseMove = (e: MouseEvent) => {
       if (!isResizing) return;
@@ -704,16 +747,23 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
     const effectiveTaskType = createMode === 'lego' ? 'lego' : createMode === 'cover' ? 'cover' : (customMode ? taskType : (sourceAudioUrl?.trim() ? 'cover' : 'text2music'));
     if (taskRequiresBase(effectiveTaskType)) {
       setLegoValidationError('');
+      setCoverValidationError('');
       try {
         const list = await aceStepModelsApi.list();
         const baseInstalled = list.dit_models.some((m) => m.id === 'base' && m.installed);
         if (!baseInstalled) {
-          setLegoValidationError('Lego (and Extract/Complete) require the Base model. Open Settings to download it, then try again.');
+          const msg = effectiveTaskType === 'cover'
+            ? 'Cover requires the Base model. Open Settings to download it, then try again.'
+            : 'Lego (and Extract/Complete) require the Base model. Open Settings to download it, then try again.';
+          if (effectiveTaskType === 'cover') setCoverValidationError(msg);
+          else setLegoValidationError(msg);
           onOpenSettings?.();
           return;
         }
       } catch (e) {
-        setLegoValidationError('Could not check models. Open Settings to ensure the Base model is installed.');
+        const msg = 'Could not check models. Open Settings to ensure the Base model is installed.';
+        if (effectiveTaskType === 'cover') setCoverValidationError(msg);
+        else setLegoValidationError(msg);
         onOpenSettings?.();
         return;
       }
@@ -784,6 +834,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
         lmBatchChunkSize,
         negativePrompt: negativePrompt.trim() || undefined,
         isFormatCaption,
+        aceStepDitModel: generationDitModel,
       });
       return;
     }
@@ -852,6 +903,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
         lmBatchChunkSize,
         negativePrompt: negativePrompt.trim() || undefined,
         isFormatCaption,
+        aceStepDitModel: generationDitModel,
       });
       return;
     }
@@ -927,6 +979,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
         lmBatchChunkSize,
         negativePrompt: negativePrompt.trim() || undefined,
         isFormatCaption,
+        aceStepDitModel: generationDitModel,
       });
     }
 
@@ -958,6 +1011,46 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
           onLoadedMetadata={(e) => setSourceDuration(e.currentTarget.duration || 0)}
         />
 
+        {/* Model selector: only installed models; workflow (e.g. Cover→base) updates this; user can override. */}
+        <div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-2">
+          <div className="flex items-center gap-2">
+            <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400 shrink-0">ACE-Step model</label>
+            {installedDitModels.length > 0 ? (
+              <select
+                value={installedDitModels.some((m) => m.id === generationDitModel) ? generationDitModel : (installedDitModels[0]?.id || 'turbo')}
+                onChange={(e) => setGenerationDitModel(e.target.value)}
+                className="bg-zinc-100 dark:bg-black/30 text-zinc-900 dark:text-white text-xs rounded-lg px-2.5 py-1.5 border border-zinc-200 dark:border-white/10 focus:ring-2 focus:ring-pink-500/50 focus:outline-none"
+              >
+                {installedDitModels.map((m) => (
+                  <option key={m.id} value={m.id}>{m.label}</option>
+                ))}
+              </select>
+            ) : (
+              <span className="text-xs text-zinc-500">No models installed</span>
+            )}
+            {(createMode === 'cover' || createMode === 'lego') && generationDitModel === 'base' && (
+              <span className="text-[10px] text-zinc-500 italic">(recommended for this mode)</span>
+            )}
+          </div>
+          {installedDitModels.length === 0 && (
+            <button type="button" onClick={() => onOpenSettings?.()} className="text-xs font-medium text-pink-600 dark:text-pink-400 hover:underline">
+              Open Settings to download
+            </button>
+          )}
+        </div>
+
+        {/* Notify when pipeline is loading (may be downloading model files in background). */}
+        {modelDownloadInProgress && (
+          <div className="flex items-center justify-between gap-3 rounded-lg bg-amber-500/15 dark:bg-amber-500/10 border border-amber-500/30 px-3 py-2 text-sm text-amber-800 dark:text-amber-200">
+            <span>Model files are being downloaded or prepared. This may take a while. Do not close the app.</span>
+            {onOpenConsoleLogs && (
+              <button type="button" onClick={onOpenConsoleLogs} className="shrink-0 font-medium text-amber-700 dark:text-amber-300 hover:underline">
+                View console logs
+              </button>
+            )}
+          </div>
+        )}
+
         {/* Header - Mode Toggle */}
         <div className="flex items-center justify-end">
           <div className="flex items-center bg-zinc-200 dark:bg-black/40 rounded-lg p-1 border border-zinc-300 dark:border-white/5">
@@ -974,7 +1067,12 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
               Custom
             </button>
             <button
-              onClick={() => { setCreateMode('cover'); setLegoValidationError(''); setCoverValidationError(''); }}
+              onClick={() => {
+                setCreateMode('cover');
+                setLegoValidationError('');
+                setCoverValidationError('');
+                if (installedDitModels.some((m) => m.id === 'base')) setGenerationDitModel('base');
+              }}
               className={`px-3 py-1.5 rounded-md text-xs font-semibold transition-all ${createMode === 'cover' ? 'bg-white dark:bg-zinc-800 text-black dark:text-white shadow-sm' : 'text-zinc-500 hover:text-zinc-900 dark:hover:text-zinc-300'}`}
             >
               Cover
@@ -984,6 +1082,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
                 setCreateMode('lego');
                 setLegoValidationError('');
                 setCoverValidationError('');
+                if (installedDitModels.some((m) => m.id === 'base')) setGenerationDitModel('base');
                 preferencesApi.update({ ace_step_dit_model: 'base' }).catch(() => {});
               }}
               className={`px-3 py-1.5 rounded-md text-xs font-semibold transition-all ${createMode === 'lego' ? 'bg-white dark:bg-zinc-800 text-black dark:text-white shadow-sm' : 'text-zinc-500 hover:text-zinc-900 dark:hover:text-zinc-300'}`}
@@ -1466,6 +1565,29 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
               </div>
             </div>
 
+            {/* Inference steps (cover): up to 80 for base model (docs) */}
+            <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden">
+              <div className="px-3 py-2.5 text-xs font-bold uppercase tracking-wide text-zinc-500 dark:text-zinc-400 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/5 flex items-center gap-1.5">
+                Inference steps
+                <InfoTooltip text="Cover uses Base model automatically. Docs recommend 32–80 steps for base; more steps = better quality, slower." />
+              </div>
+              <div className="p-3 space-y-2">
+                <div className="flex items-center justify-between gap-2">
+                  <input
+                    type="range"
+                    min="8"
+                    max="80"
+                    step="1"
+                    value={inferenceSteps}
+                    onChange={(e) => { setInferenceSteps(Number(e.target.value)); setQualityPreset('custom'); }}
+                    className="flex-1 h-2 bg-zinc-200 dark:bg-zinc-700 rounded-lg appearance-none cursor-pointer accent-pink-500"
+                  />
+                  <span className="text-xs font-mono text-zinc-900 dark:text-white bg-zinc-100 dark:bg-black/20 px-2 py-1 rounded w-10 text-right">{inferenceSteps}</span>
+                </div>
+                <p className="text-[10px] text-zinc-500">1–80 (Base model used for Cover; 32–64 recommended)</p>
+              </div>
+            </div>
+
             {/* Quality preset */}
             <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden">
               <div className="px-3 py-2.5 text-xs font-bold uppercase tracking-wide text-zinc-500 dark:text-zinc-400 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/5 flex items-center gap-1.5">
@@ -2262,20 +2384,20 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
               <div className="flex items-center justify-between">
                 <span className="inline-flex items-center gap-1.5">
                   <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">Inference Steps</label>
-                  <InfoTooltip text="Number of denoising steps. 65 recommended for quality (low CFG + high steps). Turbo: 8–20." />
+                  <InfoTooltip text="Number of denoising steps. 65 recommended for quality (low CFG + high steps). Turbo: 8–20. Base/Cover: 32–80 (docs)." />
                 </span>
                 <span className="text-xs font-mono text-zinc-900 dark:text-white bg-zinc-100 dark:bg-black/20 px-2 py-0.5 rounded">{inferenceSteps}</span>
               </div>
               <input
                 type="range"
                 min="4"
-                max="75"
+                max="80"
                 step="1"
                 value={inferenceSteps}
                 onChange={(e) => { setInferenceSteps(Number(e.target.value)); setQualityPreset('custom'); }}
                 className="w-full h-2 bg-zinc-200 dark:bg-zinc-700 rounded-lg appearance-none cursor-pointer accent-pink-500"
               />
-              <p className="text-[10px] text-zinc-500">65 recommended for quality; base/SFT can use up to 75 steps</p>
+              <p className="text-[10px] text-zinc-500">65 recommended for quality; base/cover can use up to 80 steps (INFERENCE.md)</p>
             </div>
 
             {/* Guidance Scale */}
@@ -2589,7 +2711,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
                 {(taskType === 'cover' || taskType === 'audio2audio') && 'Transform an existing track: set a source/cover audio and describe the new style. Use Cover Strength to control how much to follow the original.'}
                 {taskType === 'repaint' && 'Regenerate only a time segment of the source. Set start/end (seconds; -1 = end of file) and style for that section.'}
                 {taskType === 'extend' && 'Extend the source audio. Use source audio and optional style for the continuation.'}
-                {(taskType === 'lego' || taskType === 'extract' || taskType === 'complete') && 'Requires ACE-Step 1.5 Base model. Lego: add new tracks to existing. Extract: separate stems. Complete: add accompaniment to a single track.'}
+                {(taskType === 'cover' || taskType === 'lego' || taskType === 'extract' || taskType === 'complete') && 'Requires ACE-Step 1.5 Base model. Cover: style transfer. Lego: add new tracks. Extract: separate stems. Complete: add accompaniment.'}
               </p>
             </div>
             <div className="grid grid-cols-2 gap-3">
diff --git a/ui/components/SettingsModal.tsx b/ui/components/SettingsModal.tsx
index 7414d18..1377cbd 100644
--- a/ui/components/SettingsModal.tsx
+++ b/ui/components/SettingsModal.tsx
@@ -1,4 +1,4 @@
-import React, { useState, useEffect } from 'react';
+import React, { useState, useEffect, useRef } from 'react';
 import { X, User as UserIcon, Palette, Info, Edit3, ExternalLink, Github, FolderOpen, HardDrive, ZoomIn, Box } from 'lucide-react';
 import { useAuth } from '../context/AuthContext';
 import { EditProfileModal } from './EditProfileModal';
@@ -11,6 +11,8 @@ interface SettingsModalProps {
     theme: 'light' | 'dark';
     onToggleTheme: () => void;
     onNavigateToProfile?: (username: string) => void;
+    /** Called when an ACE-Step model download finishes (so pending generation jobs can be retried). */
+    onDownloadComplete?: () => void;
 }
 
 const ZOOM_OPTIONS = [80, 90, 100, 110, 125] as const;
@@ -33,7 +35,7 @@ const ACE_STEP_LM_OPTIONS = [
   { value: '4B', label: '4B' },
 ] as const;
 
-export const SettingsModal: React.FC<SettingsModalProps> = ({ isOpen, onClose, theme, onToggleTheme, onNavigateToProfile }) => {
+export const SettingsModal: React.FC<SettingsModalProps> = ({ isOpen, onClose, theme, onToggleTheme, onNavigateToProfile, onDownloadComplete }) => {
     const { user } = useAuth();
     const [isEditProfileOpen, setIsEditProfileOpen] = useState(false);
     const [modelsFolder, setModelsFolder] = useState('');
@@ -45,10 +47,13 @@ export const SettingsModal: React.FC<SettingsModalProps> = ({ isOpen, onClose, t
     const [aceStepDitModel, setAceStepDitModel] = useState<string>('turbo');
     const [aceStepLm, setAceStepLm] = useState<string>('1.7B');
     const [modelsSaved, setModelsSaved] = useState(false);
-    const [aceStepList, setAceStepList] = useState<{ dit_models: Array<{ id: string; label: string; description?: string; installed: boolean }>; lm_models: Array<{ id: string; label: string; installed: boolean }>; discovered_models?: Array<{ id: string; label: string; path: string; custom: boolean }>; acestep_download_available: boolean } | null>(null);
+    const [aceStepList, setAceStepList] = useState<{ dit_models: Array<{ id: string; label: string; description?: string; installed: boolean; size_gb?: number }>; lm_models: Array<{ id: string; label: string; installed: boolean; size_gb?: number }>; discovered_models?: Array<{ id: string; label: string; path: string; custom: boolean }>; acestep_download_available: boolean } | null>(null);
     const [downloadingModel, setDownloadingModel] = useState<string | null>(null);
     const [downloadError, setDownloadError] = useState<string | null>(null);
     const [downloadStatus, setDownloadStatus] = useState<AceStepDownloadStatus | null>(null);
+    const [pendingDownload, setPendingDownload] = useState<{ id: string; label: string; sizeGb: number } | null>(null);
+    const [diskSpace, setDiskSpace] = useState<{ free_gb: number; total_gb: number } | null>(null);
+    const wasDownloadingRef = useRef(false);
 
     useEffect(() => {
         if (isOpen) {
@@ -72,6 +77,7 @@ export const SettingsModal: React.FC<SettingsModalProps> = ({ isOpen, onClose, t
     // Poll download status while a download is running (so we show progress and know when it finishes)
     useEffect(() => {
         if (!isOpen || !downloadStatus?.running) return;
+        wasDownloadingRef.current = true;
         const interval = setInterval(() => {
             aceStepModelsApi.downloadStatus()
                 .then((s) => {
@@ -80,12 +86,14 @@ export const SettingsModal: React.FC<SettingsModalProps> = ({ isOpen, onClose, t
                         setDownloadingModel(null);
                         if (s.error && !s.cancelled) setDownloadError(s.error);
                         aceStepModelsApi.list().then(setAceStepList).catch(() => {});
+                        if (wasDownloadingRef.current && !s.error) onDownloadComplete?.();
+                        wasDownloadingRef.current = false;
                     }
                 })
                 .catch(() => {});
         }, 1500);
         return () => clearInterval(interval);
-    }, [isOpen, downloadStatus?.running]);
+    }, [isOpen, downloadStatus?.running, onDownloadComplete]);
 
     // Restrict selection to installed or discovered models: if current choice not in list, switch to first available
     useEffect(() => {
@@ -345,20 +353,9 @@ export const SettingsModal: React.FC<SettingsModalProps> = ({ isOpen, onClose, t
                                                         disabled={downloadStatus?.running === true}
                                                         onClick={() => {
                                                             setDownloadError(null);
-                                                            setDownloadingModel(m.id);
-                                                            aceStepModelsApi.download(m.id)
-                                                                .then((r) => {
-                                                                    if (r.error) {
-                                                                        setDownloadError(r.hint ? `${r.error} ${r.hint}` : r.error);
-                                                                        setDownloadingModel(null);
-                                                                    } else {
-                                                                        aceStepModelsApi.downloadStatus().then(setDownloadStatus);
-                                                                    }
-                                                                })
-                                                                .catch((err) => {
-                                                                    setDownloadError(err?.message || 'Download failed');
-                                                                    setDownloadingModel(null);
-                                                                });
+                                                            const sizeGb = (m as { size_gb?: number }).size_gb ?? 8;
+                                                            setPendingDownload({ id: m.id, label: m.label, sizeGb });
+                                                            aceStepModelsApi.diskSpace().then((d) => setDiskSpace(d)).catch(() => setDiskSpace(null));
                                                         }}
                                                         className="text-xs px-2 py-1 rounded bg-pink-500 text-white hover:bg-pink-600 disabled:opacity-50"
                                                     >
@@ -381,20 +378,9 @@ export const SettingsModal: React.FC<SettingsModalProps> = ({ isOpen, onClose, t
                                                         disabled={downloadStatus?.running === true}
                                                         onClick={() => {
                                                             setDownloadError(null);
-                                                            setDownloadingModel(m.id);
-                                                            aceStepModelsApi.download(m.id)
-                                                                .then((r) => {
-                                                                    if (r.error) {
-                                                                        setDownloadError(r.error);
-                                                                        setDownloadingModel(null);
-                                                                    } else {
-                                                                        aceStepModelsApi.downloadStatus().then(setDownloadStatus);
-                                                                    }
-                                                                })
-                                                                .catch((err) => {
-                                                                    setDownloadError(err?.message || 'Download failed');
-                                                                    setDownloadingModel(null);
-                                                                });
+                                                            const sizeGb = (m as { size_gb?: number }).size_gb ?? 4;
+                                                            setPendingDownload({ id: m.id, label: m.label, sizeGb });
+                                                            aceStepModelsApi.diskSpace().then((d) => setDiskSpace(d)).catch(() => setDiskSpace(null));
                                                         }}
                                                         className="text-xs px-2 py-1 rounded bg-pink-500 text-white hover:bg-pink-600 disabled:opacity-50"
                                                     >
@@ -604,6 +590,57 @@ export const SettingsModal: React.FC<SettingsModalProps> = ({ isOpen, onClose, t
                 </div>
             </div>
 
+            {/* Confirm model download: ensure user intends to download and has space */}
+            {pendingDownload && (
+                <div className="absolute inset-0 flex items-center justify-center bg-black/60 z-10 rounded-2xl" onClick={() => setPendingDownload(null)}>
+                    <div className="bg-white dark:bg-zinc-800 rounded-xl shadow-xl p-6 max-w-sm w-full mx-4 border border-zinc-200 dark:border-white/10" onClick={(e) => e.stopPropagation()}>
+                        <h4 className="font-semibold text-zinc-900 dark:text-white mb-2">Download model?</h4>
+                        <p className="text-sm text-zinc-600 dark:text-zinc-300 mb-2">
+                            <strong>{pendingDownload.label}</strong> will use approximately <strong>{pendingDownload.sizeGb} GB</strong>.
+                        </p>
+                        {diskSpace != null && (
+                            <p className="text-sm text-zinc-600 dark:text-zinc-300 mb-4">
+                                You have <strong>{diskSpace.free_gb} GB</strong> free. Ensure you have enough space before continuing.
+                            </p>
+                        )}
+                        {diskSpace != null && pendingDownload.sizeGb > 0 && diskSpace.free_gb < pendingDownload.sizeGb && (
+                            <p className="text-sm text-amber-600 dark:text-amber-400 mb-4">Low disk space. Free at least {pendingDownload.sizeGb - diskSpace.free_gb} GB more.</p>
+                        )}
+                        <div className="flex gap-3 justify-end">
+                            <button type="button" onClick={() => { setPendingDownload(null); setDiskSpace(null); }} className="px-4 py-2 rounded-lg bg-zinc-200 dark:bg-zinc-600 text-zinc-900 dark:text-white font-medium hover:bg-zinc-300 dark:hover:bg-zinc-500">
+                                Cancel
+                            </button>
+                            <button
+                                type="button"
+                                onClick={() => {
+                                    const { id, label } = pendingDownload;
+                                    setPendingDownload(null);
+                                    setDiskSpace(null);
+                                    setDownloadError(null);
+                                    setDownloadingModel(id);
+                                    aceStepModelsApi.download(id)
+                                        .then((r) => {
+                                            if (r.error) {
+                                                setDownloadError(r.hint ? `${r.error} ${r.hint}` : r.error);
+                                                setDownloadingModel(null);
+                                            } else {
+                                                aceStepModelsApi.downloadStatus().then(setDownloadStatus);
+                                            }
+                                        })
+                                        .catch((err) => {
+                                            setDownloadError(err?.message || 'Download failed');
+                                            setDownloadingModel(null);
+                                        });
+                                }}
+                                className="px-4 py-2 rounded-lg bg-pink-500 text-white font-medium hover:bg-pink-600"
+                            >
+                                Download
+                            </button>
+                        </div>
+                    </div>
+                </div>
+            )}
+
             <EditProfileModal
                 isOpen={isEditProfileOpen}
                 onClose={() => setIsEditProfileOpen(false)}
diff --git a/ui/components/SongList.tsx b/ui/components/SongList.tsx
index d52d525..3599742 100644
--- a/ui/components/SongList.tsx
+++ b/ui/components/SongList.tsx
@@ -1,6 +1,6 @@
 import React, { useState, useMemo, useRef, useEffect } from 'react';
 import { Song } from '../types';
-import { Play, MoreHorizontal, Heart, ThumbsDown, ListPlus, Pause, Search, Filter, Check, Globe, Lock, Loader2, ThumbsUp, Share2, Video, Info, Clock } from 'lucide-react';
+import { Play, MoreHorizontal, Heart, ThumbsDown, ListPlus, Pause, Search, Filter, Check, Globe, Lock, Loader2, ThumbsUp, Share2, Video, Info, Clock, Settings } from 'lucide-react';
 import { useAuth } from '../context/AuthContext';
 import { SongDropdownMenu } from './SongDropdownMenu';
 import { ShareModal } from './ShareModal';
@@ -21,6 +21,8 @@ interface SongListProps {
     onNavigateToProfile?: (username: string) => void;
     onReusePrompt?: (song: Song) => void;
     onDelete?: (song: Song) => void;
+    /** Open Settings (e.g. to install model when job is pending_model). */
+    onOpenSettings?: () => void;
 }
 
 // ... existing code ...
@@ -51,7 +53,8 @@ export const SongList: React.FC<SongListProps> = ({
     onShowDetails,
     onNavigateToProfile,
     onReusePrompt,
-    onDelete
+    onDelete,
+    onOpenSettings
 }) => {
     const { user } = useAuth();
     const [searchQuery, setSearchQuery] = useState('');
@@ -283,7 +286,27 @@ const SongItem: React.FC<SongItemProps> = ({
 
                 {song.isGenerating ? (
                     <div className="absolute inset-0 bg-black/40 flex flex-col items-center justify-center gap-1 px-2">
-                        {song.queuePosition ? (
+                        {song.generationStatus === 'pending_model' ? (
+                            /* Waiting for model */
+                            <>
+                                <div className="w-8 h-8 rounded-full bg-amber-500/20 flex items-center justify-center">
+                                    <Clock size={16} className="text-amber-400" />
+                                </div>
+                                <span className="text-[10px] font-medium text-amber-400 text-center">Waiting for model</span>
+                                {song.generationPendingReason && (
+                                    <span className="text-[9px] text-amber-300/90 text-center line-clamp-2">{song.generationPendingReason}</span>
+                                )}
+                                {onOpenSettings && (
+                                    <button
+                                        type="button"
+                                        onClick={(e) => { e.stopPropagation(); onOpenSettings?.(); }}
+                                        className="mt-1 flex items-center gap-1 px-2 py-1 rounded bg-amber-500/30 hover:bg-amber-500/50 text-amber-200 text-[10px] font-medium"
+                                    >
+                                        <Settings size={12} /> Open Settings
+                                    </button>
+                                )}
+                            </>
+                        ) : song.queuePosition ? (
                             /* Queue indicator */
                             <>
                                 <div className="w-8 h-8 rounded-full bg-amber-500/20 flex items-center justify-center">
@@ -336,7 +359,7 @@ const SongItem: React.FC<SongItemProps> = ({
                 <div className="space-y-1">
                     <div className="flex items-center gap-2">
                         <h3 className={`font-bold text-lg truncate ${isCurrent ? 'text-pink-600 dark:text-pink-500' : 'text-zinc-900 dark:text-white'}`}>
-                            {song.title || (song.isGenerating ? (song.queuePosition ? "Queued..." : (song.generationPercent != null ? `Creating... ${Math.round(song.generationPercent)}%` : "Creating...")) : "Untitled")}
+                            {song.title || (song.isGenerating ? (song.generationStatus === 'pending_model' ? "Waiting for model..." : song.queuePosition ? "Queued..." : (song.generationPercent != null ? `Creating... ${Math.round(song.generationPercent)}%` : "Creating...")) : "Untitled")}
                         </h3>
                         <span className="inline-flex items-center justify-center text-[9px] font-bold text-white bg-gradient-to-r from-pink-500 to-purple-500 px-1.5 py-0.5 rounded-sm shadow-sm">
                             v1.5
@@ -450,8 +473,8 @@ const SongItem: React.FC<SongItemProps> = ({
             {/* Timestamp */}
             <div className="text-xs font-mono text-zinc-500 dark:text-zinc-600 self-start pt-1">
                 {song.isGenerating ? (
-                    <span className={song.queuePosition ? 'text-amber-500' : 'text-pink-500'}>
-                        {song.queuePosition ? `#${song.queuePosition}` : (song.generationPercent != null ? `${Math.round(song.generationPercent)}%` : 'Creating...')}
+                    <span className={song.generationStatus === 'pending_model' ? 'text-amber-500' : song.queuePosition ? 'text-amber-500' : 'text-pink-500'}>
+                        {song.generationStatus === 'pending_model' ? 'Waiting for model' : song.queuePosition ? `#${song.queuePosition}` : (song.generationPercent != null ? `${Math.round(song.generationPercent)}%` : 'Creating...')}
                     </span>
                 ) : song.duration}
             </div>
diff --git a/ui/services/api.ts b/ui/services/api.ts
index b4c4802..a3c2ce3 100644
--- a/ui/services/api.ts
+++ b/ui/services/api.ts
@@ -280,6 +280,8 @@ export interface AceStepModelItem {
   steps?: number;
   cfg?: boolean;
   exclusive_tasks?: string[];
+  /** Approximate size in GB for download confirmation. */
+  size_gb?: number;
 }
 
 export interface AceStepDiscoveredModel {
@@ -312,6 +314,9 @@ export interface AceStepDownloadStatus {
 export const aceStepModelsApi = {
   list: (): Promise<AceStepModelsResponse> =>
     api('/api/ace-step/models') as Promise<AceStepModelsResponse>,
+  /** Free/total disk space for models directory (for download confirmation). */
+  diskSpace: (): Promise<{ free_gb: number; total_gb: number; path: string }> =>
+    api('/api/ace-step/models/disk-space') as Promise<{ free_gb: number; total_gb: number; path: string }>,
   download: (model: string): Promise<{ ok?: boolean; started?: boolean; error?: string; path?: string; hint?: string }> =>
     api('/api/ace-step/models/download', { method: 'POST', body: { model } }),
   downloadStatus: (): Promise<AceStepDownloadStatus> =>
@@ -322,8 +327,9 @@ export const aceStepModelsApi = {
 
 export interface GenerationJob {
   jobId: string;
-  status: 'pending' | 'queued' | 'running' | 'succeeded' | 'failed';
+  status: 'pending' | 'queued' | 'running' | 'succeeded' | 'failed' | 'pending_model';
   queuePosition?: number;
+  pendingReason?: string | null;
   etaSeconds?: number;
   progressPercent?: number;
   progressSteps?: string;
@@ -355,9 +361,17 @@ export const generateApi = {
   cancelJob: (jobId: string, token: string): Promise<{ cancelled: boolean; jobId: string; message: string }> =>
     api(`/api/generate/cancel/${jobId}`, { method: 'POST', token }),
 
+  /** Promote pending_model jobs to queued when model is available; start first queued job. Call after model download. */
+  retryPending: (): Promise<{ ok: boolean; promoted: number; startedJobId?: string }> =>
+    api('/api/generate/retry-pending', { method: 'POST' }) as Promise<{ ok: boolean; promoted: number; startedJobId?: string }>,
+
   getHistory: (token: string): Promise<{ jobs: GenerationJob[] }> =>
     api('/api/generate/history', { token }),
 
+  /** Whether the generation pipeline is loading (may be downloading model files). Show banner + link to console. */
+  modelDownloadStatus: (): Promise<{ in_progress: boolean; message?: string }> =>
+    api('/api/generate/model-download-status') as Promise<{ in_progress: boolean; message?: string }>,
+
   /** List LoRA adapters (Training output and custom_lora folder). */
   getLoraAdapters: (): Promise<{ adapters: LoraAdapter[] }> =>
     api('/api/generate/lora_adapters'),
diff --git a/ui/types.ts b/ui/types.ts
index f0e0364..2c34a7e 100644
--- a/ui/types.ts
+++ b/ui/types.ts
@@ -11,6 +11,9 @@ export interface Song {
   generationPercent?: number;
   generationSteps?: string;
   generationEtaSeconds?: number;
+  /** When status is pending_model: job is waiting for the required model to be installed. */
+  generationStatus?: 'queued' | 'running' | 'succeeded' | 'failed' | 'pending_model' | 'cancelled';
+  generationPendingReason?: string | null;
   tags: string[];
   audioUrl?: string;
   isPublic?: boolean;
@@ -116,6 +119,8 @@ export interface GenerationParams {
   isFormatCaption?: boolean;
   loraNameOrPath?: string;
   loraWeight?: number;
+  /** Override DiT model for this job (e.g. from Generation tab selector). Only installed models. */
+  aceStepDitModel?: string;
 }
 
 export interface PlayerState {

From 26aa3e768bf78c6aba248026d4d89b313449ddc0 Mon Sep 17 00:00:00 2001
From: E <ethehot@Es-MacBook-Pro.local>
Date: Sun, 8 Feb 2026 16:05:37 +0100
Subject: [PATCH 2/3] Lego mode: defaults from ACE-Step-1.5 #117,
 caption/instruction handling, docs

- Backend: ref_audio_strength=1.0 for lego (avoid MPS crash), force thinking=False, shift=3.0 for lego; instruction + caption prompt build (no duplicate/trailing comma)
- generate_ace: plumb shift param (3.0 lego / 6.0 default)
- UI: lego backing default 1.0, caption optional with tooltip, send style=caption only
- docs/LEGO-MODE.md: known limits and defaults; docs/ace-step-skills: ACE-Step skills reference (SKILL.md, music-creation-guide.md)

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 api/generate.py                              |  28 +-
 docs/LEGO-MODE.md                            |  32 ++
 docs/ace-step-skills/README.md               |  17 +
 docs/ace-step-skills/SKILL.md                | 356 +++++++++++++++++++
 docs/ace-step-skills/music-creation-guide.md | 350 ++++++++++++++++++
 generate_ace.py                              |   6 +-
 ui/components/CreatePanel.tsx                |  22 +-
 7 files changed, 795 insertions(+), 16 deletions(-)
 create mode 100644 docs/LEGO-MODE.md
 create mode 100644 docs/ace-step-skills/README.md
 create mode 100644 docs/ace-step-skills/SKILL.md
 create mode 100644 docs/ace-step-skills/music-creation-guide.md

diff --git a/api/generate.py b/api/generate.py
index 3f8d7cc..99699d1 100644
--- a/api/generate.py
+++ b/api/generate.py
@@ -298,13 +298,17 @@ def _run_generation(job_id: str) -> None:
                     bpm = None
             except (TypeError, ValueError):
                 bpm = None
-        # Lego/extract/complete: instruction (uppercase track) + caption appended with comma.
+        # Lego/extract/complete: instruction (uppercase track) + optional caption. Instruction is auto (e.g. "Generate the GUITAR track..."); caption is user style (key, BPM, tone).
         # No metas — BPM/key/timesignature should match the input backing.
         if task in ("lego", "extract", "complete"):
             instruction = _uppercase_track_in_instruction(
                 instruction or "Generate an instrument track based on the audio context:"
             )
-            prompt = (instruction.rstrip(":").strip() + ", " + (caption or "").strip()).strip() if (instruction or caption) else instruction
+            cap = (caption or "").strip()
+            if not cap:
+                prompt = instruction.rstrip(":").strip()
+            else:
+                prompt = (instruction.rstrip(":").strip() + ", " + cap).strip()
             if not prompt:
                 prompt = instruction or "Generate an instrument track based on the audio context"
         title = (params.get("title") or "Untitled").strip() or "Track"
@@ -343,12 +347,13 @@ def _run_generation(job_id: str) -> None:
 
         # When reference/source audio is provided, enable Audio2Audio so ACE-Step uses it (cover/retake/repaint/lego).
         # Defaults aligned with ACE-Step-MCP (ref_audio_strength 0.5) and cover/retake UX (strong source → 0.8).
-        # Lego/extract/complete: low ref_audio_strength so output follows prompt (new instrument), not copy of backing.
+        # Lego/extract/complete: default ref_audio_strength=1.0 to avoid MPS crash (batch dim mismatch when <1.0).
+        # See https://github.com/ace-step/ACE-Step-1.5/issues/117 — lower values can improve "new instrument" feel but crash on Apple Silicon.
         # See docs/ACE-Step-INFERENCE.md: audio_cover_strength 1.0 = strong adherence; lower = more prompt influence.
         audio2audio_enable = bool(src_audio_path)
         ref_default = 0.8 if task in ("cover", "retake") else (0.5 if task == "audio2audio" else 0.7)
         if task in ("lego", "extract", "complete"):
-            ref_default = 0.25  # low strength so output follows prompt (instrument) while matching backing timing
+            ref_default = 1.0  # 1.0 avoids MPS crash; user can lower via legoBackingInfluence if not on Apple Silicon
         # audio_cover_strength per ACE-Step; lego/cover blend use specific overrides when set
         ref_audio_strength = params.get("legoBackingInfluence") if task in ("lego", "extract", "complete") else None
         if ref_audio_strength is None and cover_blend:
@@ -381,6 +386,15 @@ def _run_generation(job_id: str) -> None:
             retake_variance = 0.2
         retake_variance = max(0.0, min(1.0, retake_variance))
 
+        # Shift (timestep): 3.0 recommended for lego/timing; 6.0 pipeline default for others. See ACE-Step-1.5 issue #117.
+        try:
+            shift_val = float(params.get("shift") or params.get("shiftFactor") or 0)
+        except (TypeError, ValueError):
+            shift_val = 0.0
+        if shift_val <= 0:
+            shift_val = 3.0 if task in ("lego", "extract", "complete") else 6.0
+        shift_val = max(0.1, min(10.0, shift_val))
+
         # LoRA adapter (optional): path or folder name under custom_lora
         lora_name_or_path = (params.get("loraNameOrPath") or params.get("lora_name_or_path") or "").strip()
         try:
@@ -389,8 +403,11 @@ def _run_generation(job_id: str) -> None:
             lora_weight = 0.75
         lora_weight = max(0.0, min(2.0, lora_weight))
 
-        # Thinking / LM / CoT (passed through so pipeline or future LM path can use them)
+        # Thinking / LM / CoT (passed through so pipeline or future LM path can use them).
+        # Lego/extract/complete: force thinking=False so src_audio drives context; thinking=True overrides with LLM codes (issue #117).
         thinking = bool(params.get("thinking", False))
+        if task in ("lego", "extract", "complete"):
+            thinking = False
         use_cot_metas = bool(params.get("useCotMetas", True))
         use_cot_caption = bool(params.get("useCotCaption", True))
         # Lego/extract/complete: instruction must stay verbatim ("Generate the X track based on the audio context:").
@@ -466,6 +483,7 @@ def _run_generation(job_id: str) -> None:
             lora_weight=lora_weight,
             cancel_check=cancel_check,
             vocal_language=vocal_lang or "",
+            shift=shift_val,
             thinking=thinking,
             use_cot_metas=use_cot_metas,
             use_cot_caption=use_cot_caption,
diff --git a/docs/LEGO-MODE.md b/docs/LEGO-MODE.md
new file mode 100644
index 0000000..326cf90
--- /dev/null
+++ b/docs/LEGO-MODE.md
@@ -0,0 +1,32 @@
+# Lego Mode (ACE-Step 1.5)
+
+Lego mode adds a new instrument track on top of backing audio (e.g. add guitar to a beat). It requires the **Base** DiT model.
+
+## Known limitations and workarounds
+
+We align with findings from [ACE-Step-1.5 issue #117](https://github.com/ace-step/ACE-Step-1.5/issues/117) (BPM/timing drift and MPS crashes):
+
+1. **Timing drift**  
+   Generated tracks are not strictly BPM-locked to the source; onsets can drift (20–80 ms). Workarounds that help:
+   - Match **duration** to the source (e.g. 4 bars at 135 BPM → duration ≈ 7.1 s: `4 * (60/135) * 4` for 4/4).
+   - Use **shorter segments** (e.g. 4 bars) then duplicate if needed; less time for drift.
+   - We use **shift=3.0** for lego (recommended in the issue for better timing vs shift=1.0).
+
+2. **Apple Silicon (MPS)**  
+   On Mac:
+   - **`ref_audio_strength` (backing influence) &lt; 1.0** can crash with a batch dimension mismatch at the cover→text2music transition. We **default to 1.0** for lego so Apple Silicon users don’t hit this. Lower values (0.2–0.5) can improve “new instrument” feel on non-MPS.
+   - **Thinking (LM)** is **disabled for lego** so the backing drives context; with thinking on, LLM-generated codes can override the source and hurt timing/context.
+
+3. **Caption and BPM**  
+   Include style, key, and BPM in the caption (e.g. “electric guitar, C major, 135 BPM, 4 bars”) and set **BPM** in the API so metadata matches the backing.
+
+## AceForge defaults (lego)
+
+| Parameter              | Default | Note |
+|------------------------|--------|------|
+| `ref_audio_strength`   | 1.0    | Avoids MPS crash; UI “Backing influence” |
+| `thinking`             | false  | Forced off for lego so src_audio drives context |
+| `shift`                | 3.0    | Better timing than 1.0/6.0 for lego |
+| `use_cot_caption`      | false  | Keep instruction verbatim (“Generate the X track…”) |
+
+Users can still lower backing influence on non-Apple Silicon if they want more “new instrument” and accept the risk of drift or (on MPS) crash.
diff --git a/docs/ace-step-skills/README.md b/docs/ace-step-skills/README.md
new file mode 100644
index 0000000..64ec767
--- /dev/null
+++ b/docs/ace-step-skills/README.md
@@ -0,0 +1,17 @@
+# ACE-Step Skills (reference knowledge)
+
+This folder contains reference material from the official **ACE-Step Skills** repository, used as knowledge for AceForge development and for aligning with ACE-Step concepts (caption, lyrics, task types, API parameters).
+
+**Source:** [ace-step/ace-step-skills](https://github.com/ace-step/ace-step-skills) — `skills/acestep/`  
+**License:** See the upstream repository.
+
+## Contents
+
+| File | Description |
+|------|-------------|
+| [SKILL.md](./SKILL.md) | ACE-Step skill definition: API usage, generation modes, parameters, config. |
+| [music-creation-guide.md](./music-creation-guide.md) | Music creation guide: caption, lyrics, structure tags, metadata, duration. |
+
+## Note for AceForge
+
+AceForge runs its own backend and API (Flask, `api/generate.py`, etc.), not the standalone ACE-Step API server on port 8001. The *concepts* (caption vs lyrics, task types, parameters, music-creation practices) still apply and are referenced when implementing or documenting AceForge features.
diff --git a/docs/ace-step-skills/SKILL.md b/docs/ace-step-skills/SKILL.md
new file mode 100644
index 0000000..141ed68
--- /dev/null
+++ b/docs/ace-step-skills/SKILL.md
@@ -0,0 +1,356 @@
+---
+name: acestep
+description: Use ACE-Step API to generate music, edit songs, and remix music. Supports text-to-music, lyrics generation, audio continuation, and audio repainting. Use this skill when users mention generating music, creating songs, music production, remix, or audio continuation.
+allowed-tools: Read, Write, Bash, Skill
+---
+
+# ACE-Step Music Generation Skill
+
+Use ACE-Step V1.5 API for music generation. Script: `scripts/acestep.sh` (requires curl + jq).
+
+## Prerequisites - ACE-Step API Service
+
+**IMPORTANT**: This skill requires the ACE-Step API server to be running.
+
+### Required Dependencies
+
+The `scripts/acestep.sh` script requires the following tools:
+
+**1. curl** - For making HTTP requests to the API
+**2. jq** - For parsing JSON responses
+
+#### Check Dependencies
+
+Before using this skill, verify that the required tools are installed:
+
+```bash
+# Check curl
+curl --version
+
+# Check jq
+jq --version
+```
+
+#### Installing jq
+
+If jq is not installed, the script will attempt to install it automatically. If automatic installation fails, install manually:
+
+**Windows:**
+```bash
+# Using Chocolatey
+choco install jq
+
+# Or download from: https://jqlang.github.io/jq/download/
+# Extract jq.exe and add to PATH
+```
+
+**macOS:**
+```bash
+# Using Homebrew
+brew install jq
+
+# Using MacPorts
+port install jq
+```
+
+**Linux:**
+```bash
+# Debian/Ubuntu
+sudo apt-get install jq
+
+# Fedora/RHEL/CentOS
+sudo yum install jq
+# or
+sudo dnf install jq
+
+# Arch Linux
+sudo pacman -S jq
+```
+
+**Verification:**
+```bash
+jq --version
+# Should output: jq-1.x
+```
+
+If user reports jq installation issues, guide them through manual installation for their platform.
+
+### Before First Use
+
+**Ask the user about their setup:**
+
+1. **"Do you have ACE-Step API service configured and running?"**
+
+ If **YES**:
+ - Verify the API endpoint: `curl -s http://127.0.0.1:8001/health`
+ - If using remote service, ask for the API URL and update `scripts/config.json`
+ - Proceed with music generation
+
+ If **NO** or **NOT SURE**:
+ - Ask: "Do you have ACE-Step installed?"
+
+ **If installed but not running**:
+ - Use the acestep-docs skill to help them start the service
+ - Guide them through startup process
+
+ **If not installed**:
+ - Offer to help download and install ACE-Step
+ - Ask: "Would you like to use the Windows portable package or install from source?"
+ - Use acestep-docs skill to guide through installation
+
+### Service Configuration
+
+**Local Service (Default):**
+```json
+{
+ "api_url": "http://127.0.0.1:8001",
+ "api_key": ""
+}
+```
+
+**Remote Service:**
+```json
+{
+ "api_url": "http://your-server-ip:8001",
+ "api_key": "your-api-key-if-needed"
+}
+```
+
+To configure remote service, update `scripts/config.json` or use:
+```bash
+cd {skill_directory}/scripts/
+./acestep.sh config --set api_url "http://remote-server:8001"
+./acestep.sh config --set api_key "your-key"
+```
+
+### Using acestep-docs Skill for Setup Help
+
+**IMPORTANT**: For installation and startup, always use the acestep-docs skill to get complete and accurate guidance.
+
+When user needs help with installation or startup, invoke the acestep-docs skill:
+
+```
+Use the Skill tool to invoke: acestep-docs
+```
+
+**DO NOT provide simplified startup commands** - each user's environment may be different. Always guide them to use acestep-docs for proper setup.
+
+### Health Check
+
+**To verify if service is running:**
+```bash
+curl http://127.0.0.1:8001/health
+# Should return: {"status":"ok",...}
+```
+
+If health check fails, use acestep-docs skill to help user start the service properly.
+
+---
+
+**WORKFLOW**: For user requests requiring vocals, you should:
+1. Consult [Music Creation Guide](./music-creation-guide.md) for lyrics writing, caption creation, duration/BPM/key selection
+2. Write complete, well-structured lyrics yourself based on the guide
+3. Generate using Caption mode with `-c` and `-l` parameters
+
+Only use Simple/Random mode (`-d` or `random`) for quick inspiration or instrumental exploration.
+
+## Output Files
+
+After generation, the script automatically saves results to the `acestep_output` folder in the project root (same level as `.claude`):
+
+```
+project_root/
+├── .claude/
+│ └── skills/acestep/...
+├── acestep_output/ # Output directory
+│ ├──.json # Complete task result (JSON)
+│ ├── _1.mp3 # First audio file
+│ ├── _2.mp3 # Second audio file (if batch_size > 1)
+│ └── ...
+└── ...
+```
+
+### JSON Result Structure
+
+**Important**: When LM enhancement is enabled (`use_format=true`), the final synthesized content may differ from your input. Check the JSON file for actual values:
+
+| Field | Description |
+|-------|-------------|
+| `prompt` | **Actual caption** used for synthesis (may be LM-enhanced) |
+| `lyrics` | **Actual lyrics** used for synthesis (may be LM-enhanced) |
+| `metas.prompt` | Original input caption |
+| `metas.lyrics` | Original input lyrics |
+| `metas.bpm` | BPM used |
+| `metas.keyscale` | Key scale used |
+| `metas.duration` | Duration in seconds |
+| `generation_info` | Detailed timing and model info |
+| `seed_value` | Seeds used (for reproducibility) |
+| `lm_model` | LM model name |
+| `dit_model` | DiT model name |
+
+To get the actual synthesized lyrics, parse the JSON and read the top-level `lyrics` field, not `metas.lyrics`.
+
+## Script Commands
+
+**CRITICAL - Complete Lyrics Input**: When providing lyrics via the `-l` parameter, you MUST pass ALL lyrics content WITHOUT any omission:
+- If user provides lyrics, pass the ENTIRE text they give you
+- If you generate lyrics yourself, pass the COMPLETE lyrics you created
+- NEVER truncate, shorten, or pass only partial lyrics
+- Missing lyrics will result in incomplete or incoherent songs
+
+**Music Parameters**: Refer to [Music Creation Guide](./music-creation-guide.md) for how to calculate duration, choose BPM, key scale, and time signature.
+
+```bash
+# need to cd skills path
+cd {project_root}/{.claude or .codex}/skills/acestep/
+
+# Caption mode - RECOMMENDED: Write lyrics first, then generate
+./scripts/acestep.sh generate -c "Electronic pop, energetic synths" -l "[Verse] Your complete lyrics
+[Chorus] Full chorus here..." --duration 120 --bpm 128
+
+# Instrumental only
+./scripts/acestep.sh generate "Jazz with saxophone"
+
+# Quick exploration (Simple/Random mode)
+./scripts/acestep.sh generate -d "A cheerful song about spring"
+./scripts/acestep.sh random
+
+# Options
+./scripts/acestep.sh generate "Rock" --duration 60 --batch 2
+./scripts/acestep.sh generate "EDM" --no-thinking # Faster
+
+# Other commands
+./scripts/acestep.sh status 
+./scripts/acestep.sh health
+./scripts/acestep.sh models
+```
+
+## Configuration
+
+**Important**: Configuration follows this priority (high to low):
+
+1. **Command line arguments** > **config.json defaults**
+2. User-specified parameters **temporarily override** defaults but **do not modify** config.json
+3. Only `config --set` command **permanently modifies** config.json
+
+### Default Config File (`scripts/config.json`)
+
+```json
+{
+ "api_url": "http://127.0.0.1:8001",
+ "api_key": "",
+ "generation": {
+ "thinking": true,
+ "use_format": false,
+ "use_cot_caption": true,
+ "use_cot_language": false,
+ "batch_size": 1,
+ "audio_format": "mp3",
+ "vocal_language": "en"
+ }
+}
+```
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `api_url` | `http://127.0.0.1:8001` | API server address |
+| `api_key` | `""` | API authentication key (optional) |
+| `generation.thinking` | `true` | Enable 5Hz LM (higher quality, slower) |
+| `generation.audio_format` | `mp3` | Output format (mp3/wav/flac) |
+| `generation.vocal_language` | `en` | Vocal language |
+
+## API Reference
+
+All responses wrapped: `{"data":, "code": 200, "error": null, "timestamp": ...}`
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/health` | GET | Health check |
+| `/release_task` | POST | Create generation task |
+| `/query_result` | POST | Query task status, body: `{"task_id_list": ["id"]}` |
+| `/v1/models` | GET | List available models |
+| `/v1/audio?path={path}` | GET | Download audio file |
+
+### Query Result Response
+
+```json
+{
+ "data": [{
+ "task_id": "xxx",
+ "status": 1,
+ "result": "[{\"file\":\"/v1/audio?path=...\",\"metas\":{\"bpm\":120,\"duration\":60,\"keyscale\":\"C Major\"}}]"
+ }]
+}
+```
+
+Status codes: `0` = processing, `1` = success, `2` = failed
+
+## Request Parameters (`/release_task`)
+
+Parameters can be placed in `param_obj` object.
+
+### Generation Modes
+
+| Mode | Usage | When to Use |
+|------|-------|-------------|
+| **Caption** (Recommended) | `generate -c "style" -l "lyrics"` | For vocal songs - write lyrics yourself first |
+| **Simple** | `generate -d "description"` | Quick exploration, LM generates everything |
+| **Random** | `random` | Random generation for inspiration |
+
+### Core Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `prompt` | string | "" | Music style description (Caption mode) |
+| `lyrics` | string | "" | **Full lyrics content** - Pass ALL lyrics without omission. Use `[inst]` for instrumental. Partial/truncated lyrics = incomplete songs |
+| `sample_mode` | bool | false | Enable Simple/Random mode |
+| `sample_query` | string | "" | Description for Simple mode |
+| `thinking` | bool | false | Enable 5Hz LM for audio code generation |
+| `use_format` | bool | false | Use LM to enhance caption/lyrics |
+| `model` | string | - | DiT model name |
+| `batch_size` | int | 1 | Number of audio files to generate |
+
+### Music Attributes
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `audio_duration` | float | - | Duration in seconds |
+| `bpm` | int | - | Tempo (beats per minute) |
+| `key_scale` | string | "" | Key (e.g. "C Major") |
+| `time_signature` | string | "" | Time signature (e.g. "4/4") |
+| `vocal_language` | string | "en" | Language code (en, zh, ja, etc.) |
+| `audio_format` | string | "mp3" | Output format (mp3/wav/flac) |
+
+### Generation Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `inference_steps` | int | 8 | Diffusion steps |
+| `guidance_scale` | float | 7.0 | CFG scale |
+| `seed` | int | -1 | Random seed (-1 for random) |
+| `infer_method` | string | "ode" | Diffusion method (ode/sde) |
+
+### Audio Task Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `task_type` | string | "text2music" | text2music / continuation / repainting |
+| `src_audio_path` | string | - | Source audio for continuation |
+| `repainting_start` | float | 0.0 | Repainting start position (seconds) |
+| `repainting_end` | float | - | Repainting end position (seconds) |
+
+### Example Request (Simple Mode)
+
+```json
+{
+ "sample_mode": true,
+ "sample_query": "A cheerful pop song about spring",
+ "thinking": true,
+ "param_obj": {
+ "duration": 60,
+ "bpm": 120,
+ "language": "en"
+ },
+ "batch_size": 2
+}
+```
diff --git a/docs/ace-step-skills/music-creation-guide.md b/docs/ace-step-skills/music-creation-guide.md
new file mode 100644
index 0000000..3a52f6a
--- /dev/null
+++ b/docs/ace-step-skills/music-creation-guide.md
@@ -0,0 +1,350 @@
+# ACE-Step Music Creation Guide
+
+> This guide contains professional music creation knowledge extracted from ACE-Step Tutorial. Use this as reference when creating music with ACE-Step.
+
+---
+
+## Input Control: What Do You Want?
+
+This is the part where you communicate "creative intent" with the model—what kind of music you want to generate.
+
+| Category | Parameter | Function |
+|----------|-----------|----------|
+| **Task Type** | `task_type` | Determines generation mode: text2music, cover, repaint, lego, extract, complete |
+| **Text Input** | `caption` | Description of overall music elements: style, instruments, emotion, atmosphere, timbre, vocal gender, progression, etc. |
+| | `lyrics` | Temporal element description: lyric content, music structure evolution, vocal changes, vocal/instrument performance style, start/end style, articulation, etc. (use `[Instrumental]` for instrumental music) |
+| **Music Metadata** | `bpm` | Tempo (30–300) |
+| | `keyscale` | Key (e.g., C Major, Am) |
+| | `timesignature` | Time signature (4/4, 3/4, 6/8) |
+| | `vocal_language` | Vocal language |
+| | `duration` | Target duration (seconds) |
+| **Audio Reference** | `reference_audio` | Global reference for timbre or style (for cover, style transfer) |
+| | `src_audio` | Source audio for non-text2music tasks (text2music defaults to silence, no input needed) |
+| | `audio_codes` | Semantic codes input to model in Cover mode (advanced: reuse codes for variants, convert songs to codes for extension, combine like DJ mixing) |
+| **Interval Control** | `repainting_start/end` | Time interval for operations (repaint redraw area / lego new track area) |
+
+---
+
+## About Caption: The Most Important Input
+
+**Caption is the most important factor affecting generated music.**
+
+It supports multiple input formats: simple style words, comma-separated tags, complex natural language descriptions. We've trained to be compatible with various formats, ensuring text format doesn't significantly affect model performance.
+
+### Common Dimensions for Caption Writing
+
+| Dimension | Examples |
+|-----------|----------|
+| **Style/Genre** | pop, rock, jazz, electronic, hip-hop, R&B, folk, classical, lo-fi, synthwave |
+| **Emotion/Atmosphere** | melancholic, uplifting, energetic, dreamy, dark, nostalgic, euphoric, intimate |
+| **Instruments** | acoustic guitar, piano, synth pads, 808 drums, strings, brass, electric bass |
+| **Timbre Texture** | warm, bright, crisp, muddy, airy, punchy, lush, raw, polished |
+| **Era Reference** | 80s synth-pop, 90s grunge, 2010s EDM, vintage soul, modern trap |
+| **Production Style** | lo-fi, high-fidelity, live recording, studio-polished, bedroom pop |
+| **Vocal Characteristics** | female vocal, male vocal, breathy, powerful, falsetto, raspy, choir |
+| **Speed/Rhythm** | slow tempo, mid-tempo, fast-paced, groovy, driving, laid-back |
+| **Structure Hints** | building intro, catchy chorus, dramatic bridge, fade-out ending |
+
+### Practical Principles for Caption Writing
+
+1. **Specific beats vague** — "sad piano ballad with female breathy vocal" works better than "a sad song."
+
+2. **Combine multiple dimensions** — Single-dimension descriptions give the model too much room to play; combining style+emotion+instruments+timbre can more precisely anchor your desired direction.
+
+3. **Use references well** — "in the style of 80s synthwave" or "reminiscent of Bon Iver" can quickly convey complex aesthetic preferences.
+
+4. **Texture words are useful** — Adjectives like warm, crisp, airy, punchy can influence mixing and timbre tendencies.
+
+5. **Don't pursue perfect descriptions** — Caption is a starting point, not an endpoint. Write a general direction first, then iterate based on results.
+
+6. **Description granularity determines freedom** — More omitted descriptions give the model more room to play, more random factor influence; more detailed descriptions constrain the model more. Decide specificity based on your needs—want surprises? Write less. Want control? Write more details.
+
+7. **Avoid conflicting words** — Conflicting style combinations easily lead to degraded output. For example, wanting both "classical strings" and "hardcore metal" simultaneously—the model will try to fuse but usually not ideal.
+
+ **Ways to resolve conflicts:**
+ - **Repetition reinforcement** — Strengthen the elements you want more in mixed styles by repeating certain words
+ - **Conflict to evolution** — Transform style conflicts into temporal style evolution. For example: "Start with soft strings, middle becomes noisy dynamic metal rock, end turns to hip-hop"—this gives the model clear guidance on how to handle different styles, rather than mixing them into a mess
+
+---
+
+## About Lyrics: The Temporal Script
+
+If Caption describes the music's "overall portrait"—style, atmosphere, timbre—then **Lyrics is the music's "temporal script"**, controlling how music unfolds over time.
+
+Lyrics is not just lyric content. It carries:
+- The lyric text itself
+- **Structure tags** ([Verse], [Chorus], [Bridge]...)
+- **Vocal style hints** ([raspy vocal], [whispered]...)
+- **Instrumental sections** ([guitar solo], [drum break]...)
+- **Energy changes** ([building energy], [explosive drop]...)
+
+### Common Structure Tags
+
+| Category | Tag | Description |
+|----------|-----|-------------|
+| **Basic Structure** | `[Intro]` | Opening, establish atmosphere |
+| | `[Verse]` / `[Verse 1]` | Verse, narrative progression |
+| | `[Pre-Chorus]` | Pre-chorus, build energy |
+| | `[Chorus]` | Chorus, emotional climax |
+| | `[Bridge]` | Bridge, transition or elevation |
+| | `[Outro]` | Ending, conclusion |
+| **Dynamic Sections** | `[Build]` | Energy gradually rising |
+| | `[Drop]` | Electronic music energy release |
+| | `[Breakdown]` | Reduced instrumentation, space |
+| **Instrumental Sections** | `[Instrumental]` | Pure instrumental, no vocals |
+| | `[Guitar Solo]` | Guitar solo |
+| | `[Piano Interlude]` | Piano interlude |
+| **Special Tags** | `[Fade Out]` | Fade out ending |
+| | `[Silence]` | Silence |
+
+### Combining Tags: Use Moderately
+
+Structure tags can be combined with `-` for finer control:
+
+```
+[Chorus - anthemic]
+This is the chorus lyrics
+Dreams are burning
+
+[Bridge - whispered]
+Whisper those words softly
+```
+
+⚠️ **Note: Don't stack too many tags.**
+
+```
+❌ Not recommended:
+[Chorus - anthemic - stacked harmonies - high energy - powerful - epic]
+
+✅ Recommended:
+[Chorus - anthemic]
+```
+
+**Principle**: Keep structure tags concise; put complex style descriptions in Caption.
+
+### ⚠️ Key: Maintain Consistency Between Caption and Lyrics
+
+**Models are not good at resolving conflicts.** If descriptions in Caption and Lyrics contradict, the model gets confused and output quality decreases.
+
+**Checklist:**
+- Instruments in Caption ↔ Instrumental section tags in Lyrics
+- Emotion in Caption ↔ Energy tags in Lyrics
+- Vocal description in Caption ↔ Vocal control tags in Lyrics
+
+Think of Caption as "overall setting" and Lyrics as "shot script"—they should tell the same story.
+
+### Vocal Control Tags
+
+| Tag | Effect |
+|-----|--------|
+| `[raspy vocal]` | Raspy, textured vocals |
+| `[whispered]` | Whispered |
+| `[falsetto]` | Falsetto |
+| `[powerful belting]` | Powerful, high-pitched singing |
+| `[spoken word]` | Rap/recitation |
+| `[harmonies]` | Layered harmonies |
+| `[call and response]` | Call and response |
+| `[ad-lib]` | Improvised embellishments |
+
+### Energy and Emotion Tags
+
+| Tag | Effect |
+|-----|--------|
+| `[high energy]` | High energy, passionate |
+| `[low energy]` | Low energy, restrained |
+| `[building energy]` | Increasing energy |
+| `[explosive]` | Explosive energy |
+| `[melancholic]` | Melancholic |
+| `[euphoric]` | Euphoric |
+| `[dreamy]` | Dreamy |
+| `[aggressive]` | Aggressive |
+
+### Lyric Text Writing Tips
+
+**1. Control Syllable Count**
+
+**6-10 syllables per line** usually works best. The model aligns syllables to beats—if one line has 6 syllables and the next has 14, rhythm becomes strange.
+
+**Tip**: Keep similar syllable counts for lines in the same position (e.g., first line of each verse) (±1-2 deviation).
+
+**2. Use Case to Control Intensity**
+
+Uppercase indicates stronger vocal intensity:
+
+```
+[Verse]
+walking through the empty streets (normal intensity)
+
+[Chorus]
+WE ARE THE CHAMPIONS! (high intensity, shouting)
+```
+
+**3. Use Parentheses for Background Vocals**
+
+```
+[Chorus]
+We rise together (together)
+Into the light (into the light)
+```
+
+Content in parentheses is processed as background vocals or harmonies.
+
+**4. Extend Vowels**
+
+You can extend sounds by repeating vowels:
+
+```
+Feeeling so aliiive
+```
+
+But use cautiously—effects are unstable, sometimes ignored or mispronounced.
+
+**5. Clear Section Separation**
+
+Separate each section with blank lines:
+
+```
+[Verse 1]
+First verse lyrics
+Continue first verse
+
+[Chorus]
+Chorus lyrics
+Chorus continues
+```
+
+### Avoiding "AI-flavored" Lyrics
+
+These characteristics make lyrics seem mechanical and lack human touch:
+
+| Red Flag 🚩 | Description |
+|-------------|-------------|
+| **Adjective stacking** | "neon skies, electric hearts, endless dreams"—filling a section with vague imagery |
+| **Rhyme chaos** | Inconsistent rhyme patterns, or forced rhymes causing semantic breaks |
+| **Blurred section boundaries** | Lyric content crosses structure tags, Verse content "flows" into Chorus |
+| **No breathing room** | Each line too long, can't sing in one breath |
+| **Mixed metaphors** | First verse uses water imagery, second suddenly becomes fire, third is flying—listeners can't anchor |
+
+**Metaphor discipline**: Stick to one core metaphor per song, exploring its multiple aspects.
+
+---
+
+## About Music Metadata: Optional Fine Control
+
+**Most of the time, you don't need to manually set metadata.**
+
+When you enable `thinking` mode (or enable `use_cot_metas`), LM automatically infers appropriate BPM, key, time signature, etc. based on your Caption and Lyrics. This is usually good enough.
+
+But if you have clear ideas, you can also manually control them:
+
+| Parameter | Control Range | Description |
+|-----------|--------------|-------------|
+| `bpm` | 30–300 | Tempo. Common distribution: slow songs 60–80, mid-tempo 90–120, fast songs 130–180 |
+| `keyscale` | Key | e.g., `C Major`, `Am`, `F# Minor`. Affects overall pitch and emotional color |
+| `timesignature` | Time signature | `4/4` (most common), `3/4` (waltz), `6/8` (swing feel) |
+| `vocal_language` | Language | Vocal language. LM usually auto-detects from lyrics |
+| `duration` | Seconds | Target duration. Actual generation may vary slightly |
+
+### Understanding Control Boundaries
+
+These parameters are **guidance** rather than **precise commands**:
+
+- **BPM**: Common range (60–180) works well; extreme values (like 30 or 280) have less training data, may be unstable
+- **Key**: Common keys (C, G, D, Am, Em) are stable; rare keys may be ignored or shifted
+- **Time signature**: `4/4` is most reliable; `3/4`, `6/8` usually OK; complex signatures (like `5/4`, `7/8`) are advanced, effects vary by style
+- **Duration**: Short songs (30–60s) and medium length (2–4min) are stable; very long generation may have repetition or structure issues
+
+### When Do You Need Manual Settings?
+
+| Scenario | Suggestion |
+|----------|------------|
+| Daily generation | Don't worry, let LM auto-infer |
+| Clear tempo requirement | Manually set `bpm` |
+| Specific style (e.g., waltz) | Manually set `timesignature=3/4` |
+| Need to match other material | Manually set `bpm` and `duration` |
+| Pursue specific key color | Manually set `keyscale` |
+
+**Tip**: If you manually set metadata but generation results clearly don't match—check if there's conflict with Caption/Lyrics. For example, Caption says "slow ballad" but `bpm=160`, the model gets confused.
+
+**Recommended Practice**: Don't write tempo, BPM, key, and other metadata information in Caption. These should be set through dedicated metadata parameters (`bpm`, `keyscale`, `timesignature`, etc.), not described in Caption. Caption should focus on style, emotion, instruments, timbre, and other musical characteristics, while metadata information is handled by corresponding parameters.
+
+---
+
+## Duration Calculation Guidelines
+
+When creating music, you MUST calculate appropriate duration based on lyrics content and song structure:
+
+### Estimation Method
+
+- **Per line of lyrics**: 3-5 seconds
+- **Intro/Outro**: 5-10 seconds each
+- **Instrumental sections**: 5-15 seconds each
+- **Typical song structures**:
+ - 2 verses + 2 choruses: 120-150 seconds minimum
+ - 2 verses + 2 choruses + bridge: 180-240 seconds minimum
+ - Full song with intro/outro: 210-270 seconds (3.5-4.5 minutes)
+
+### Common Pitfalls
+
+❌ **DON'T**: Set duration too short for the lyrics amount
+- Example: 10 lines of lyrics with 120 seconds → rushed, compressed
+
+✅ **DO**: Calculate realistic duration
+- Example: 10 lines of lyrics → ~40 seconds of vocals + 20 seconds intro/outro = 60 seconds minimum
+
+### BPM and Duration Relationship
+
+The BPM affects how quickly lyrics are sung:
+- **Slower BPM (60-80)**: Need MORE duration for same lyrics
+- **Medium BPM (100-130)**: Standard duration
+- **Faster BPM (150-180)**: Can fit more lyrics in less time, but still need breathing room
+
+**Rule of thumb**: When in doubt, estimate longer rather than shorter. A song that's too short will feel rushed and incomplete.
+
+---
+
+## Complete Example
+
+Assuming Caption is: `female vocal, piano ballad, emotional, intimate atmosphere, strings, building to powerful chorus`
+
+```
+[Intro - piano]
+
+[Verse 1]
+月光洒在窗台上
+我听见你的呼吸
+城市在远处沉睡
+只有我们还醒着
+
+[Pre-Chorus]
+这一刻如此安静
+却藏着汹涌的心
+
+[Chorus - powerful]
+让我们燃烧吧
+像夜空中的烟火
+短暂却绚烂
+这就是我们的时刻
+
+[Verse 2]
+时间在指尖流过
+我们抓不住什么
+但至少此刻拥有
+彼此眼中的火焰
+
+[Bridge - whispered]
+如果明天一切消散
+至少我们曾经闪耀
+
+[Final Chorus]
+让我们燃烧吧
+像夜空中的烟火
+短暂却绚烂
+THIS IS OUR MOMENT!
+
+[Outro - fade out]
+```
+
+Note: In this example, Lyrics tags (piano, powerful, whispered) are consistent with Caption descriptions (piano ballad, building to powerful chorus, intimate), with no conflicts.
+
+---
diff --git a/generate_ace.py b/generate_ace.py
index 3420ab3..4dc4cec 100644
--- a/generate_ace.py
+++ b/generate_ace.py
@@ -898,6 +898,7 @@ def _run_ace_text2music(
     lora_weight: float = 0.75,
     cancel_check: Optional[Callable[[], bool]] = None,
     vocal_language: str | None = None,
+    shift: float = 6.0,  # Timestep shift; 3.0 for lego (ACE-Step-1.5 #117)
     # Thinking / LM / CoT (passed to pipeline; used when LM path is integrated)
     thinking: bool = False,
     use_cot_metas: bool = True,
@@ -1016,7 +1017,7 @@ def _run_ace_text2music(
             "batch_size": 1,
             "save_path": str(output_path),
             "debug": False,
-            "shift": 6.0,
+            "shift": float(shift),
         }
         if vocal_language is not None and (vocal_language or "").strip():
             call_kwargs["vocal_language"] = (vocal_language or "").strip()
@@ -1189,6 +1190,7 @@ def generate_track_ace(
     lora_weight: float = 0.75,
     cancel_check: Optional[Callable[[], bool]] = None,
     vocal_language: str = "",
+    shift: float = 6.0,  # Timestep shift; 3.0 recommended for lego (better timing per ACE-Step-1.5 #117)
     # Thinking / LM / CoT (forwarded to pipeline for when LM path is integrated)
     thinking: bool = False,
     use_cot_metas: bool = True,
@@ -1358,6 +1360,7 @@ def generate_track_ace(
         lora_weight=float(lora_weight),
         cancel_check=cancel_check,
         vocal_language=(vocal_language or "").strip() or None,
+        shift=float(shift),
         thinking=thinking,
         use_cot_metas=use_cot_metas,
         use_cot_caption=use_cot_caption,
@@ -1368,6 +1371,7 @@ def generate_track_ace(
         lm_top_p=lm_top_p,
         lm_negative_prompt=(lm_negative_prompt or "").strip() or "NO USER INPUT",
         lm_checkpoint_path=lm_checkpoint_path,
+        shift=shift,
     )
 
     _report_progress(0.90, "fades")
diff --git a/ui/components/CreatePanel.tsx b/ui/components/CreatePanel.tsx
index c7ec20d..c73a5ed 100644
--- a/ui/components/CreatePanel.tsx
+++ b/ui/components/CreatePanel.tsx
@@ -179,7 +179,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
   // Lego tab only
   const [legoTrackName, setLegoTrackName] = useState('guitar');
   const [legoCaption, setLegoCaption] = useState('');
-  const [legoBackingInfluence, setLegoBackingInfluence] = useState(0.25);
+  const [legoBackingInfluence, setLegoBackingInfluence] = useState(1.0);
   const [legoValidationError, setLegoValidationError] = useState('');
 
   // Shared between Simple and Custom: description/style (genre, mood, etc.) and title
@@ -846,6 +846,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
         setLegoValidationError('Please select backing audio (required for Lego).');
         return;
       }
+      // Instruction is auto from track name; caption (style) is optional user description (key, BPM, tone). Backend builds prompt = instruction + ", " + caption when caption present.
       const instruction = `Generate the ${legoTrackName} track based on the audio context:`;
       const effGuidance = guidanceScale;
       const effAudioCover = legoBackingInfluence;
@@ -853,9 +854,9 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
       onGenerate({
         customMode: false,
         songDescription: undefined,
-        prompt: instruction + (legoCaption.trim() ? ', ' + legoCaption.trim() : ''),
+        prompt: legoCaption.trim() ? instruction + ', ' + legoCaption.trim() : instruction,
         lyrics: '',
-        style: legoCaption.trim() || instruction,
+        style: legoCaption.trim(), // caption only; do not send instruction as style (backend uses instruction + style for prompt)
         title: title.trim() || `Lego ${legoTrackName}`,
         instrumental: true,
         vocalLanguage: 'en',
@@ -1677,24 +1678,25 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
               </select>
             </div>
 
-            {/* Describe the track (caption) */}
+            {/* Caption: style/key/BPM (instruction above is auto from track name) */}
             <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden">
-              <div className="px-3 py-2.5 text-xs font-bold uppercase tracking-wide text-zinc-500 dark:text-zinc-400 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/5">
-                Describe the track
+              <div className="px-3 py-2.5 text-xs font-bold uppercase tracking-wide text-zinc-500 dark:text-zinc-400 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/5 flex items-center gap-1.5">
+                Caption (optional)
+                <InfoTooltip text="Instruction is auto-generated from the track name above (e.g. “Generate the GUITAR track…”). Add caption here for style: key, BPM, tone (e.g. “electric guitar riff, C major, 135 BPM”)." />
               </div>
               <textarea
                 value={legoCaption}
                 onChange={(e) => setLegoCaption(e.target.value)}
-                placeholder="e.g. lead guitar melody with bluesy feel, punchy drums, warm bass line..."
+                placeholder="e.g. electric guitar riff, C major, 135 BPM, funk — or leave blank"
                 className="w-full h-24 bg-transparent p-3 text-sm text-zinc-900 dark:text-white placeholder-zinc-400 dark:placeholder-zinc-600 focus:outline-none resize-none"
               />
             </div>
 
-            {/* Backing influence (critical for Lego: low = new instrument, high = copy) */}
+            {/* Backing influence: 1.0 avoids MPS crash on Apple Silicon; lower = more "new instrument" but can crash on Mac (ACE-Step-1.5 #117) */}
             <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden">
               <div className="px-3 py-2.5 text-xs font-bold uppercase tracking-wide text-zinc-500 dark:text-zinc-400 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/5 flex items-center gap-1.5">
                 Backing influence
-                <InfoTooltip text="How much the backing audio affects the result. Lower (0.2–0.4) = more new instrument from your description; higher = output closer to the backing (can sound like a copy). Start with 0.25 and increase if timing drifts." />
+                <InfoTooltip text="How much the backing audio affects the result. Default 1.0 avoids crashes on Apple Silicon (MPS). Lower (0.2–0.5) = more new instrument from your description but may crash on Mac. For best timing alignment with the backing, use 1.0 and match duration to your source (e.g. 4 bars at 135 BPM ≈ 7.1 s). See ACE-Step-1.5 #117." />
               </div>
               <div className="p-3 space-y-2">
                 <div className="flex items-center justify-between gap-2">
@@ -1744,7 +1746,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({ onGenerate, isGenerati
             <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden">
               <div className="px-3 py-2.5 text-xs font-bold uppercase tracking-wide text-zinc-500 dark:text-zinc-400 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/5 flex items-center gap-1.5">
                 Lego tuning (optional)
-                <InfoTooltip text="Critical parameters for Lego. Tweak and report what works best: backing influence (low = new instrument, high = copy), guidance (higher = follow prompt more), steps (more = quality)." />
+                <InfoTooltip text="Lego: backing influence 1.0 = stable on all platforms (required on Apple Silicon). Shorter segments (e.g. 4 bars) and matching duration to source BPM improve timing. Thinking is off for Lego so the backing drives context. See ACE-Step-1.5 #117." />
               </div>
               <div className="p-3 space-y-4">
                 <div className="space-y-2">

From d4a03ef1fbab2d887f489cd571ea86f0019793ea Mon Sep 17 00:00:00 2001
From: E <ethehot@Es-MacBook-Pro.local>
Date: Sun, 8 Feb 2026 16:21:31 +0100
Subject: [PATCH 3/3] Fix duplicate shift argument in _run_ace_text2music call

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 generate_ace.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/generate_ace.py b/generate_ace.py
index 4dc4cec..7421088 100644
--- a/generate_ace.py
+++ b/generate_ace.py
@@ -1371,7 +1371,6 @@ def generate_track_ace(
         lm_top_p=lm_top_p,
         lm_negative_prompt=(lm_negative_prompt or "").strip() or "NO USER INPUT",
         lm_checkpoint_path=lm_checkpoint_path,
-        shift=shift,
     )
 
     _report_progress(0.90, "fades")