diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md new file mode 100644 index 0000000..d9fd84c --- /dev/null +++ b/.claude/CLAUDE.md @@ -0,0 +1,25 @@ +# Voicebox Project Notes + +## CLI — voicebox-cli vs cli.py + +**`voicebox/voicebox-cli`** is the real CLI. It is stdlib-only (no pip deps), self-contained, and is what users actually run. It has all commands: `server`, `voices`, `import`, `generate`/`say`, `health`, `config`, `transcribe`, `create-voice`. Config persists to `~/.config/voicebox/config.json`. + +**`voicebox/backend/cli.py`** is dead code. It predates `voicebox-cli` and was superseded. Its only live reference is the launcher line in `setup-linux.sh` which is intentionally left as-is. **Do not modify cli.py.** + +When the user asks for CLI changes, always work on `voicebox-cli`. + +## Key Architecture + +- **Backend**: FastAPI (`backend/main.py`) served by uvicorn on port 17493 +- **Entry points**: `server.py` (PyInstaller binary), `backend/main.py __main__` (dev) +- **Dev script**: `scripts/dev-backend-watch.sh` — loads `.env` from `voicebox/` and `../` then runs uvicorn with `--reload` +- **MLX backend**: `backend/backends/mlx_backend.py` — Apple Silicon only, uses mlx-audio. Models: `mlx-community/Qwen3-TTS-12Hz-{1.7B,0.6B}-Base-4bit`. Uses `Base` variants (not `CustomVoice` — those require a named speaker, not ref_audio). +- **PyTorch backend**: `backend/backends/pytorch_backend.py` — CUDA/CPU, uses qwen-tts +- **Logging**: stdlib `logging`. Set `LOG_LEVEL=DEBUG` env var for verbose output. + +## MLX Gotchas + +- `transformers` verbosity is suppressed at module-level import in `mlx_backend.py` — do not restore or move this +- Concurrent MLX loads crash Metal (`commit an already committed command buffer`) — serialized via `_MLX_LOAD_LOCK` threading lock in `load_model_async` +- `CustomVoice` model variants require a named speaker arg; `Base` variants support arbitrary voice cloning via `ref_audio`/`ref_text` +- On 16GB unified memory, bf16 models cause swap pressure — use 4-bit quantized variants diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b3042dc --- /dev/null +++ b/.dockerignore @@ -0,0 +1,27 @@ +data/ +backend/venv/ +node_modules/ +__pycache__/ +*.pyc +*.egg-info/ +.claude/ +.git/ +.github/ +.vscode/ +*.md +docs/ +mlx-test/ +scripts/ +tauri/ +web/ +landing/ +.DS_Store +*.log +*.cache +dist/ +build/ +.env +.env.* +*.swp +*.swo +*~ diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 0000000..5dca125 --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Pre-commit hook: lint staged files +# Install: git config core.hooksPath .githooks +set -euo pipefail + +REPO_ROOT="$(git rev-parse --show-toplevel)" +STAGED=$(git diff --cached --name-only --diff-filter=ACMR) + +if [[ -z "$STAGED" ]]; then + exit 0 +fi + +FAILED=0 + +# ── Python (ruff) ───────────────────────────────────────────────────────────── +PY_FILES=$(echo "$STAGED" | grep '\.py$' || true) +if [[ -n "$PY_FILES" ]]; then + RUFF="$REPO_ROOT/backend/venv/bin/ruff" + if [[ -x "$RUFF" ]]; then + echo "→ ruff: checking Python files..." + if ! echo "$PY_FILES" | xargs "$RUFF" check --quiet; then + echo " ruff found issues. Run: backend/venv/bin/ruff check --fix " + FAILED=1 + fi + else + echo " (ruff not found in backend/venv — skipping Python lint)" + fi +fi + +# ── JS/TS (biome) ───────────────────────────────────────────────────────────── +JS_FILES=$(echo "$STAGED" | grep -E '\.(js|jsx|ts|tsx|json)$' | grep -v 'node_modules' || true) +if [[ -n "$JS_FILES" ]]; then + BIOME=$(command -v biome 2>/dev/null \ + || ls "$REPO_ROOT"/node_modules/.bin/biome 2>/dev/null \ + || ls "$REPO_ROOT"/app/node_modules/.bin/biome 2>/dev/null \ + || true) + if [[ -x "$BIOME" ]]; then + echo "→ biome: checking JS/TS files..." + if ! echo "$JS_FILES" | xargs "$BIOME" check --no-errors-on-unmatched; then + echo " biome found issues. Run: biome check --write " + FAILED=1 + fi + else + echo " (biome not found — skipping JS/TS lint)" + fi +fi + +exit $FAILED diff --git a/.gitignore b/.gitignore index 05f7ef0..4fbee19 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,8 @@ data/profiles/* data/generations/* data/projects/* data/voicebox.db +data/huggingface +data/model_prefs.json !data/.gitkeep # Logs @@ -57,3 +59,7 @@ tauri/src-tauri/binaries/* tmp/ temp/ *.tmp +output*.m4a +package-lock.json +.claude +tauri/src-tauri/gen/Assets.car diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..763b626 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12.12 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..f3d7c9a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,88 @@ +# Voicebox TTS Server +# CUDA 12.9 + Python 3.12 on Ubuntu 24.04 +# +# Build: +# DOCKER_BUILDKIT=1 docker build -t voicebox . +# DOCKER_BUILDKIT=1 docker build --build-arg CUDA=0 -t voicebox-cpu . +# DOCKER_BUILDKIT=1 docker build --build-arg SERVERLESS=1 -t voicebox-serverless . +# +# Run: +# docker compose up -d +# +# syntax=docker/dockerfile:1.4 + +ARG CUDA=1 +ARG SERVERLESS=0 + +# --- Base stage --- +FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS base-cuda +FROM ubuntu:24.04 AS base-cpu + +# --- Pick base based on CUDA arg -- +FROM base-cuda AS base-1 +FROM base-cpu AS base-0 +FROM base-${CUDA} AS base + +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 + +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python3-venv \ + python3-dev \ + python3-pip \ + libsndfile1 \ + ffmpeg \ + curl \ + sox \ + && rm -rf /var/lib/apt/lists/* + +# --- Dependencies stage (cached layer) --- +FROM base AS deps + +ARG CUDA +WORKDIR /app + +# Create virtual environment outside /app to survive volume mount +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +COPY backend/requirements.txt ./requirements.txt + +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install --upgrade pip && \ + if [ "$CUDA" = "1" ]; then \ + pip install torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cu124 && \ + pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu124; \ + else \ + pip install torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cpu && \ + pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu; \ + fi + +# Source is volume-mounted at runtime (local dev) or COPYed below (serverless) +ENV HF_HOME=/app/data/huggingface +ENV PATH="/opt/venv/bin:$PATH" + +# Copy source into image for non-volume-mount deployments (e.g. RunPod) +COPY backend/ /app/backend/ + +# --- Normal mode: FastAPI server on port 17493 --- +FROM deps AS final-0 +EXPOSE 17493 +HEALTHCHECK --interval=60s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -f http://localhost:17493/health || exit 1 +ENTRYPOINT ["/opt/venv/bin/python3", "-m", "backend.main"] +CMD ["--host", "0.0.0.0", "--port", "17493", "--data-dir", "/app/data"] + +# --- Serverless mode: RunPod handler --- +FROM deps AS final-1 +ENV SERVERLESS=1 +HEALTHCHECK NONE +ENTRYPOINT ["/opt/venv/bin/python3", "-u", "-m", "backend.serverless_handler"] +CMD [] + +# --- Pick final stage based on SERVERLESS arg --- +ARG SERVERLESS +FROM final-${SERVERLESS} AS final diff --git a/Makefile b/Makefile index 620f6c8..bd082ad 100644 --- a/Makefile +++ b/Makefile @@ -41,19 +41,29 @@ setup: setup-js setup-python ## Full project setup (all dependencies) @echo -e " Run $(YELLOW)make dev$(NC) to start development servers" setup-js: ## Install JavaScript dependencies (bun) + @command -v bun >/dev/null 2>&1 || { \ + echo -e "$(YELLOW)bun not found — installing...$(NC)"; \ + curl -fsSL https://bun.sh/install | bash; \ + } @echo -e "$(BLUE)Installing JavaScript dependencies...$(NC)" bun install setup-python: $(VENV)/bin/activate ## Set up Python virtual environment and dependencies @echo -e "$(BLUE)Installing Python dependencies...$(NC)" $(PIP) install --upgrade pip - $(PIP) install -r $(BACKEND_DIR)/requirements.txt @if [ "$$(uname -m)" = "arm64" ] && [ "$$(uname)" = "Darwin" ]; then \ - echo -e "$(BLUE)Detected Apple Silicon - installing MLX dependencies...$(NC)"; \ + echo -e "$(BLUE)Detected Apple Silicon - using MLX-compatible dependency resolution...$(NC)"; \ $(PIP) install -r $(BACKEND_DIR)/requirements-mlx.txt; \ + grep -v -E "^transformers" $(BACKEND_DIR)/requirements.txt > /tmp/voicebox-requirements-filtered.txt; \ + $(PIP) install -r /tmp/voicebox-requirements-filtered.txt; \ + rm /tmp/voicebox-requirements-filtered.txt; \ + $(PIP) install --no-deps git+https://github.com/QwenLM/Qwen3-TTS.git; \ echo -e "$(GREEN)✓ MLX backend enabled (native Metal acceleration)$(NC)"; \ + echo -e "$(YELLOW)Note: Using transformers 5.0.0rc3 (required by MLX)$(NC)"; \ + else \ + $(PIP) install -r $(BACKEND_DIR)/requirements.txt; \ + $(PIP) install git+https://github.com/QwenLM/Qwen3-TTS.git; \ fi - $(PIP) install git+https://github.com/QwenLM/Qwen3-TTS.git @echo -e "$(GREEN)✓ Python environment ready$(NC)" $(VENV)/bin/activate: @@ -72,7 +82,7 @@ setup-rust: ## Install Rust toolchain (if not present) # DEVELOPMENT # ============================================================================= -.PHONY: dev dev-backend dev-frontend dev-web kill-dev +.PHONY: dev dev-backend dev-backend-watch dev-frontend dev-web kill-dev dev: ## Start backend + desktop app (parallel) @echo -e "$(BLUE)Starting development servers...$(NC)" @@ -82,9 +92,11 @@ dev: ## Start backend + desktop app (parallel) sleep 2 && $(MAKE) dev-frontend & \ wait -dev-backend: ## Start FastAPI backend server +dev-backend: dev-backend-watch ## Start FastAPI backend server (venv-verified, auto-reload) + +dev-backend-watch: ## Start backend with venv verification + Python file watching @echo -e "$(BLUE)Starting backend server on http://localhost:17493$(NC)" - $(VENV_BIN)/uvicorn backend.main:app --reload --port 17493 + ./scripts/dev-backend-watch.sh dev-frontend: ## Start Tauri desktop app @echo -e "$(BLUE)Starting Tauri desktop app...$(NC)" diff --git a/SERVERLESS.md b/SERVERLESS.md new file mode 100644 index 0000000..cb003c1 --- /dev/null +++ b/SERVERLESS.md @@ -0,0 +1,161 @@ +# RunPod Serverless Deployment + +Voicebox can run as a [RunPod Serverless](https://docs.runpod.io/serverless/quickstart) worker. Workers spin up on demand, process requests, and shut down automatically — you only pay while they're running. + +## How it works + +The serverless image starts a RunPod handler (`serverless_handler.py`) which: + +1. Launches the existing FastAPI server in a background thread +2. Waits for `/health` to respond (up to 5 min on cold start for model downloads) +3. Proxies each RunPod job as an HTTP request to the local server +4. Returns the response — JSON as-is, audio files as base64 + +Model idle-unloading is disabled in serverless mode (`SERVERLESS=1`). The model stays loaded for the worker's lifetime. RunPod shuts down the entire worker after the configured idle timeout. + +## Build the image + +```bash +# From the voicebox/ directory +./scripts/serverless-build.sh + +# Build + push to Docker Hub +./scripts/serverless-build.sh --push --tag youruser/voicebox-serverless:latest + +# Build + push to GHCR +./scripts/serverless-build.sh --push --tag ghcr.io/youruser/voicebox-serverless:latest +``` + +Or manually: + +```bash +DOCKER_BUILDKIT=1 docker build \ + --build-arg CUDA=1 \ + --build-arg SERVERLESS=1 \ + -t voicebox-serverless \ + . +``` + +## RunPod endpoint settings + +When creating your endpoint on [runpod.io](https://runpod.io): + +| Setting | Recommended | +| ----------------- | -------------------------------- | +| Container image | your pushed image tag | +| GPU | RTX 4090 or similar (16GB+ VRAM) | +| Idle timeout | **60 seconds** | +| Execution timeout | 600 seconds | +| Active workers | 0 (pure on-demand) | +| Max workers | 1 (increase for production) | +| FlashBoot | enabled | + +**On idle timeout:** The GPU stays allocated (and billed) for the full idle window regardless of VRAM usage. Keeping the model hot and using a short idle timeout (60s) is more cost-effective than unloading the model and using a long idle timeout. + +## Authentication + +Add your RunPod API key to the root `.env`: + +``` +RUNPOD_API_KEY=your_runpod_api_key_here +``` + +All requests to the RunPod endpoint require this key as a bearer token: + +``` +Authorization: Bearer $RUNPOD_API_KEY +``` + +## Sending requests + +RunPod wraps requests in a job envelope. The handler accepts: + +| Field | Type | Description | +| --------- | ------ | -------------------------------------- | +| `method` | string | HTTP method (default: `"POST"`) | +| `path` | string | Required. API path, e.g. `"/generate"` | +| `body` | object | JSON body for POST/PUT requests | +| `params` | object | Query string parameters | +| `headers` | object | Additional HTTP headers | + +### Health check + +```bash +curl -X POST https://api.runpod.ai/v2/$ENDPOINT_ID/runsync \ + -H "Authorization: Bearer $RUNPOD_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"input": {"method": "GET", "path": "/health"}}' +``` + +### Generate speech + +```bash +curl -X POST https://api.runpod.ai/v2/$ENDPOINT_ID/runsync \ + -H "Authorization: Bearer $RUNPOD_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "input": { + "method": "POST", + "path": "/generate", + "body": { + "profile_id": "your-profile-id", + "text": "Hello from RunPod." + } + } + }' +``` + +### Download audio + +Audio endpoints return base64-encoded content with `"is_binary": true`: + +```json +{ + "output": { + "status_code": 200, + "is_binary": true, + "body_base64": "UklGRi..." + } +} +``` + +Decode it: + +```bash +echo "$BODY_BASE64" | base64 -d > output.wav +``` + +### Async jobs (long generations) + +For long texts, use `/run` instead of `/runsync` to avoid the 90s sync timeout: + +```bash +# Submit +JOB=$(curl -s -X POST https://api.runpod.ai/v2/$ENDPOINT_ID/run \ + -H "Authorization: Bearer $RUNPOD_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"input": {"path": "/generate", "body": {"profile_id": "...", "text": "..."}}}') + +JOB_ID=$(echo $JOB | jq -r '.id') + +# Poll +curl https://api.runpod.ai/v2/$ENDPOINT_ID/status/$JOB_ID \ + -H "Authorization: Bearer $RUNPOD_API_KEY" +``` + +## Local testing + +The RunPod SDK includes a local test server: + +```bash +cd voicebox/ +SERVERLESS=1 python3 -m backend.serverless_handler --rp_serve_api +``` + +This starts a local HTTP server that simulates the RunPod job protocol at `http://localhost:8000`. + +## Limitations + +- **SSE streaming not supported** — RunPod jobs return a single response. Generation still works, just without real-time progress events. Use `/generate` (non-streaming) via the job body. +- **Ephemeral storage** — The SQLite database (profiles, history) is lost when the worker shuts down. Voice profiles need to be re-imported each cold start, or use a RunPod network volume for persistence. +- **Cold start time** — First start downloads model weights (~3–5 GB from HuggingFace). Subsequent starts with FlashBoot are much faster. diff --git a/app/src/App.tsx b/app/src/App.tsx index fbe2911..f9a1bb6 100644 --- a/app/src/App.tsx +++ b/app/src/App.tsx @@ -82,7 +82,6 @@ function App() { console.log('Dev mode: Skipping auto-start of server (run it separately)'); setServerReady(true); // Mark as ready so UI doesn't show loading screen // Mark that server was not started by app (so we don't try to stop it on close) - // @ts-expect-error - adding property to window window.__voiceboxServerStartedByApp = false; return; } @@ -93,25 +92,48 @@ function App() { } serverStartingRef.current = true; - console.log('Production mode: Starting bundled server...'); - - platform.lifecycle - .startServer(false) - .then((serverUrl) => { - console.log('Server is ready at:', serverUrl); - // Update the server URL in the store with the dynamically assigned port - useServerStore.getState().setServerUrl(serverUrl); - setServerReady(true); - // Mark that we started the server (so we know to stop it on close) - // @ts-expect-error - adding property to window - window.__voiceboxServerStartedByApp = true; - }) - .catch((error) => { - console.error('Failed to auto-start server:', error); - serverStartingRef.current = false; - // @ts-expect-error - adding property to window - window.__voiceboxServerStartedByApp = false; - }); + + const SERVER_URL = 'http://127.0.0.1:17493'; + + // Check if a server is already running before trying to start one. + // This handles the case where a dev server (or a previous instance) is + // already listening — we can skip the sidecar startup entirely. + const tryExistingServer = async (): Promise => { + try { + const res = await fetch(`${SERVER_URL}/health`, { signal: AbortSignal.timeout(1500) }); + if (res.ok) { + console.log('Production mode: Found server already running, reusing it.'); + useServerStore.getState().setServerUrl(SERVER_URL); + setServerReady(true); + window.__voiceboxServerStartedByApp = false; + return true; + } + } catch { + // Not running — fall through to sidecar startup + } + return false; + }; + + tryExistingServer().then((alreadyRunning) => { + if (alreadyRunning) return; + + console.log('Production mode: Starting bundled server...'); + platform.lifecycle + .startServer(false) + .then((serverUrl) => { + console.log('Server is ready at:', serverUrl); + // Update the server URL in the store with the dynamically assigned port + useServerStore.getState().setServerUrl(serverUrl); + setServerReady(true); + // Mark that we started the server (so we know to stop it on close) + window.__voiceboxServerStartedByApp = true; + }) + .catch((error) => { + console.error('Failed to auto-start server:', error); + serverStartingRef.current = false; + window.__voiceboxServerStartedByApp = false; + }); + }); // Cleanup: stop server on actual unmount (not StrictMode remount) // Note: Window close is handled separately in Tauri Rust code diff --git a/app/src/components/AudioTab/AudioTab.tsx b/app/src/components/AudioTab/AudioTab.tsx index f76e99d..e55374b 100644 --- a/app/src/components/AudioTab/AudioTab.tsx +++ b/app/src/components/AudioTab/AudioTab.tsx @@ -1,6 +1,6 @@ import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'; -import { Check, CheckCircle2, Edit, Plus, Speaker, Trash2 } from 'lucide-react'; -import { useState } from 'react'; +import { Check, CheckCircle2, Edit, Mic, Plus, RefreshCw, Speaker, Trash2 } from 'lucide-react'; +import { useEffect, useRef, useState } from 'react'; import { Badge } from '@/components/ui/badge'; import { Button } from '@/components/ui/button'; import { @@ -25,6 +25,7 @@ import { BOTTOM_SAFE_AREA_PADDING } from '@/lib/constants/ui'; import { cn } from '@/lib/utils/cn'; import { usePlayerStore } from '@/stores/playerStore'; import { usePlatform } from '@/platform/PlatformContext'; +import { useToast } from '@/components/ui/use-toast'; interface AudioDevice { id: string; @@ -37,16 +38,19 @@ export function AudioTab() { const [createDialogOpen, setCreateDialogOpen] = useState(false); const [editingChannel, setEditingChannel] = useState(null); const [selectedChannelId, setSelectedChannelId] = useState(null); + const [isRefreshing, setIsRefreshing] = useState(false); const queryClient = useQueryClient(); const audioUrl = usePlayerStore((state) => state.audioUrl); const isPlayerVisible = !!audioUrl; + const { toast } = useToast(); + const prevDefaultInputRef = useRef(null); const { data: channels, isLoading: channelsLoading } = useQuery({ queryKey: ['channels'], queryFn: () => apiClient.listChannels(), }); - const { data: devices, isLoading: devicesLoading } = useQuery({ + const { data: devices, isLoading: devicesLoading, refetch: refetchDevices } = useQuery({ queryKey: ['audio-devices'], queryFn: async () => { if (!platform.metadata.isTauri) { @@ -60,8 +64,44 @@ export function AudioTab() { } }, enabled: platform.metadata.isTauri, + refetchOnWindowFocus: true, + staleTime: 5000, }); + const { data: inputDevices } = useQuery({ + queryKey: ['audio-input-devices'], + queryFn: async () => { + if (!platform.metadata.isTauri) return []; + try { + return await platform.audio.listInputDevices(); + } catch (error) { + console.error('Failed to list input devices:', error); + return []; + } + }, + enabled: platform.metadata.isTauri, + refetchOnWindowFocus: true, + // Poll every 3 seconds so default input changes are noticed automatically + refetchInterval: 3000, + staleTime: 2000, + }); + + const defaultInputDevice = inputDevices?.find((d) => d.is_default); + + // Toast when default input device changes + useEffect(() => { + if (!defaultInputDevice) return; + const prev = prevDefaultInputRef.current; + if (prev !== null && prev !== defaultInputDevice.name) { + toast({ + title: 'Default input changed', + description: defaultInputDevice.name, + duration: 3000, + }); + } + prevDefaultInputRef.current = defaultInputDevice.name; + }, [defaultInputDevice?.name, toast]); + const { data: profiles } = useQuery({ queryKey: ['profiles'], queryFn: () => apiClient.listProfiles(), @@ -83,6 +123,27 @@ export function AudioTab() { channelId: string; data: { name?: string; device_ids?: string[] }; }) => apiClient.updateChannel(channelId, data), + onMutate: async ({ channelId, data }) => { + // Optimistically update channels cache so checkmarks appear immediately + await queryClient.cancelQueries({ queryKey: ['channels'] }); + const previous = queryClient.getQueryData(['channels']); + if (data.device_ids !== undefined) { + queryClient.setQueryData( + ['channels'], + (old: Array<{ id: string; device_ids: string[]; [key: string]: unknown }> | undefined) => + old?.map((ch) => + ch.id === channelId ? { ...ch, device_ids: data.device_ids! } : ch, + ), + ); + } + return { previous }; + }, + onError: (_err, _vars, context) => { + // Roll back on error + if (context?.previous) { + queryClient.setQueryData(['channels'], context.previous); + } + }, onSuccess: () => { queryClient.invalidateQueries({ queryKey: ['channels'] }); queryClient.invalidateQueries({ queryKey: ['profile-channels'] }); @@ -110,8 +171,20 @@ export function AudioTab() { const setChannelVoices = useMutation({ mutationFn: ({ channelId, profileIds }: { channelId: string; profileIds: string[] }) => apiClient.setChannelVoices(channelId, profileIds), - onSuccess: () => { - queryClient.invalidateQueries({ queryKey: ['channel-voices'] }); + onMutate: async ({ channelId, profileIds }) => { + // Optimistically update channel-voices cache so the list updates immediately + await queryClient.cancelQueries({ queryKey: ['channel-voices', channelId] }); + const previous = queryClient.getQueryData(['channel-voices', channelId]); + queryClient.setQueryData(['channel-voices', channelId], { profile_ids: profileIds }); + return { previous, channelId }; + }, + onError: (_err, _vars, context) => { + if (context?.previous !== undefined) { + queryClient.setQueryData(['channel-voices', context.channelId], context.previous); + } + }, + onSuccess: (_data, { channelId }) => { + queryClient.invalidateQueries({ queryKey: ['channel-voices', channelId] }); queryClient.invalidateQueries({ queryKey: ['profile-channels'] }); }, }); @@ -132,13 +205,17 @@ export function AudioTab() { return (
-
+

Audio Channels

+

+ Route different voices to dedicated speakers — ideal for story mode, museum displays, or + events where each character plays through a separate device. +

{/* Left Column - Channels */} @@ -268,7 +345,23 @@ export function AudioTab() { )} >
-

Available Devices

+
+

Available Devices

+ +

{selectedChannelId ? selectedChannel?.is_default @@ -276,6 +369,12 @@ export function AudioTab() { : 'Click devices to add or remove them from the selected channel' : 'Select a channel to assign devices'}

+ {defaultInputDevice && ( +
+ + Default input: {defaultInputDevice.name} +
+ )}
{allDevices.length > 0 ? (
diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx index a8d556a..f61526f 100644 --- a/app/src/components/Generation/FloatingGenerateBox.tsx +++ b/app/src/components/Generation/FloatingGenerateBox.tsx @@ -34,7 +34,7 @@ export function FloatingGenerateBox({ const setSelectedProfileId = useUIStore((state) => state.setSelectedProfileId); const { data: selectedProfile } = useProfile(selectedProfileId || ''); const { data: profiles } = useProfiles(); - const [isExpanded, setIsExpanded] = useState(false); + const [isExpanded, setIsExpanded] = useState(true); const [isInstructMode, setIsInstructMode] = useState(false); const containerRef = useRef(null); const textareaRef = useRef(null); @@ -49,9 +49,9 @@ export function FloatingGenerateBox({ // Calculate if track editor is visible (on stories route with items) const hasTrackEditor = isStoriesRoute && currentStory && currentStory.items.length > 0; - const { form, handleSubmit, isPending } = useGenerationForm({ + const { form, handleSubmit, isPending, isQueueLimitReached } = useGenerationForm({ onSuccess: async (generationId) => { - setIsExpanded(false); + setIsInstructMode(false); // If on stories route and a story is selected, add generation to story if (isStoriesRoute && selectedStoryId && generationId) { try { @@ -75,36 +75,6 @@ export function FloatingGenerateBox({ }, }); - // Click away handler to collapse the box - useEffect(() => { - function handleClickOutside(event: MouseEvent) { - const target = event.target as HTMLElement; - - // Don't collapse if clicking inside the container - if (containerRef.current?.contains(target)) { - return; - } - - // Don't collapse if clicking on a Select dropdown (which renders in a portal) - if ( - target.closest('[role="listbox"]') || - target.closest('[data-radix-popper-content-wrapper]') - ) { - return; - } - - setIsExpanded(false); - } - - if (isExpanded) { - document.addEventListener('mousedown', handleClickOutside); - } - - return () => { - document.removeEventListener('mousedown', handleClickOutside); - }; - }, [isExpanded]); - // Set first voice as default if none selected useEffect(() => { if (!selectedProfileId && profiles && profiles.length > 0) { @@ -116,10 +86,10 @@ export function FloatingGenerateBox({ useEffect(() => { if (!isExpanded) { // Reset textarea height after collapse animation completes - const timeoutId = setTimeout(() => { - const textarea = textareaRef.current; - if (textarea) { - textarea.style.height = '32px'; + const timeoutId = setTimeout(() => { + const textarea = textareaRef.current; + if (textarea) { + textarea.style.height = '37px'; textarea.style.overflowY = 'hidden'; } }, 200); // Wait for animation to complete @@ -132,7 +102,7 @@ export function FloatingGenerateBox({ const adjustHeight = () => { textarea.style.height = 'auto'; const scrollHeight = textarea.scrollHeight; - const minHeight = 100; // Expanded minimum + const minHeight = 115; // Expanded minimum (+15%) const maxHeight = 300; // Max height in pixels const targetHeight = Math.max(minHeight, Math.min(scrollHeight, maxHeight)); textarea.style.height = `${targetHeight}px`; @@ -207,7 +177,7 @@ export function FloatingGenerateBox({ setIsExpanded(true)} onFocus={() => setIsExpanded(true)} /> @@ -257,7 +227,7 @@ export function FloatingGenerateBox({ setIsExpanded(true)} onFocus={() => setIsExpanded(true)} /> @@ -297,7 +267,7 @@ export function FloatingGenerateBox({
+ {isQueueLimitReached && ( + + Queue full: 3 active jobs max for this user. + + )} ( Language - @@ -130,7 +130,7 @@ export function GenerationForm() { render={({ field }) => ( Model Size - diff --git a/app/src/components/History/HistoryTable.tsx b/app/src/components/History/HistoryTable.tsx index e572f69..abecb80 100644 --- a/app/src/components/History/HistoryTable.tsx +++ b/app/src/components/History/HistoryTable.tsx @@ -1,5 +1,6 @@ import { AudioWaveform, + Copy, Download, FileArchive, Loader2, @@ -251,8 +252,11 @@ export function HistoryTable() { {history.map((gen) => { const isCurrentlyPlaying = currentAudioId === gen.id && isPlaying; return ( + // biome-ignore lint/a11y/useSemanticElements: Complex flex layout requires div wrapper
{ + if (e.key === 'Enter' || e.key === ' ') { + e.preventDefault(); + const target = e.target as HTMLElement; + if (target.closest('textarea') || window.getSelection()?.toString()) { + return; + } + handlePlay(gen.id, gen.text, gen.profile_id); + } + }} > {/* Waveform icon */}
@@ -288,17 +302,35 @@ export function HistoryTable() {
{/* Right side - Transcript textarea */} -
+