diff --git a/CHANGELOG.md b/CHANGELOG.md index b7116d3..0cd93e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,9 +62,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Self-documenting help system with `make help` - Colored output for better readability - Supports parallel development server execution +- **Audiobook Tab** - New long-form narration workflow in the app + - Import/paste `.txt` book content and review/edit before generation + - Generate a quick 5-sentence preview before full run + - Chunk long text automatically and process chunk-by-chunk with retry support + - Auto-create and update a Story during generation, with export shortcut +- **Text chunking utility** - Added reusable sentence-aware chunking for large text inputs (`app/src/lib/utils/textChunking.ts`) ### Changed - **README** - Added Makefile reference and updated Quick Start with Makefile-based setup instructions alongside manual setup +- **Navigation** - Added Audiobook route/tab to the app sidebar +- **Generation API types** - Added optional `instruct` field to `GenerationRequest` +- **App styling** - Added `scrollbar-visible` utility styles for long-scroll panels/editors --- diff --git a/app/src/components/AudiobookTab/AudiobookTab.tsx b/app/src/components/AudiobookTab/AudiobookTab.tsx new file mode 100644 index 0000000..323d29f --- /dev/null +++ b/app/src/components/AudiobookTab/AudiobookTab.tsx @@ -0,0 +1,1173 @@ +import { useQueryClient } from '@tanstack/react-query'; +import { useNavigate } from '@tanstack/react-router'; +import { + AlertTriangle, + BookOpen, + ChevronDown, + ChevronRight, + Download, + FlaskConical, + Loader2, + Pause, + Play, + RotateCcw, + Square, + Upload, +} from 'lucide-react'; +import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; +import { Button } from '@/components/ui/button'; +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'; +import { Input } from '@/components/ui/input'; +import { Label } from '@/components/ui/label'; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select'; +import { Textarea } from '@/components/ui/textarea'; +import { useToast } from '@/components/ui/use-toast'; +import { apiClient } from '@/lib/api/client'; +import { LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages'; +import { BOTTOM_SAFE_AREA_PADDING } from '@/lib/constants/ui'; +import { useProfiles } from '@/lib/hooks/useProfiles'; +import { useExportStoryAudio } from '@/lib/hooks/useStories'; +import { cn } from '@/lib/utils/cn'; +import { chunkText, type TextChunk } from '@/lib/utils/textChunking'; +import { useGenerationStore } from '@/stores/generationStore'; +import { usePlayerStore } from '@/stores/playerStore'; +import { useStoryStore } from '@/stores/storyStore'; +import { useUIStore } from '@/stores/uiStore'; + +const HARD_MAX_CHUNK_SIZE = 4500; +const DEFAULT_TARGET_CHUNK_SIZE = 4500; +const MAX_CHUNK_RETRIES = 3; +const LARGE_FILE_WARNING_BYTES = 2 * 1024 * 1024; +const QUICK_PREVIEW_SENTENCE_LIMIT = 5; + +type ChunkStatus = 'pending' | 'running' | 'done' | 'failed'; +type AudiobookRunStatus = + | 'running' + | 'paused' + | 'stopping' + | 'stopped' + | 'completed' + | 'completed_with_errors'; + +interface AudiobookChunk extends TextChunk { + status: ChunkStatus; + attempts: number; + generationId?: string; + error?: string; +} + +interface AudiobookRun { + storyId: string; + storyName: string; + profileId: string; + language: LanguageCode; + modelSize: '1.7B' | '0.6B'; + instruct?: string; + chunks: AudiobookChunk[]; + status: AudiobookRunStatus; + startedAt: string; + finishedAt?: string; +} + +function getErrorMessage(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + return 'Unknown generation error'; +} + +function wait(ms: number): Promise { + return new Promise((resolve) => { + window.setTimeout(resolve, ms); + }); +} + +function buildRunChunks(chunks: TextChunk[]): AudiobookChunk[] { + return chunks.map((chunk) => ({ + ...chunk, + status: 'pending', + attempts: 0, + })); +} + +function splitTextIntoSentences(rawText: string): string[] { + const normalized = rawText.replace(/\r\n/g, '\n').replace(/\r/g, '\n').trim(); + if (!normalized) { + return []; + } + + const paragraphs = normalized + .split(/\n+/) + .map((paragraph) => paragraph.trim()) + .filter(Boolean); + + const sentencePattern = /[^.!?]+[.!?]+(?:["')\]]+)?|[^.!?]+$/g; + const sentences: string[] = []; + + for (const paragraph of paragraphs) { + const matches = paragraph.match(sentencePattern); + if (!matches || matches.length === 0) { + sentences.push(paragraph); + continue; + } + sentences.push(...matches.map((sentence) => sentence.trim()).filter(Boolean)); + } + + return sentences; +} + +function buildQuickPreviewText(rawText: string): { text: string; sentenceCount: number } { + const sentences = splitTextIntoSentences(rawText); + const previewSentences = sentences.slice(0, QUICK_PREVIEW_SENTENCE_LIMIT); + return { + text: previewSentences.join(' ').trim(), + sentenceCount: previewSentences.length, + }; +} + +function createStoryName(fileName: string | null): string { + const dateStamp = new Date().toISOString().slice(0, 10); + if (!fileName) { + return `Audiobook ${dateStamp}`; + } + + const basename = fileName.replace(/\.txt$/i, '').trim(); + if (!basename) { + return `Audiobook ${dateStamp}`; + } + + return `${basename} (${dateStamp})`; +} + +export function AudiobookTab() { + const { toast } = useToast(); + const navigate = useNavigate(); + const queryClient = useQueryClient(); + + const fileInputRef = useRef(null); + const processingRef = useRef(false); + const pauseRequestedRef = useRef(false); + const stopRequestedRef = useRef(false); + const runRef = useRef(null); + + const setIsGenerating = useGenerationStore((state) => state.setIsGenerating); + const audioUrl = usePlayerStore((state) => state.audioUrl); + const setAudioWithAutoPlay = usePlayerStore((state) => state.setAudioWithAutoPlay); + const isPlayerVisible = !!audioUrl; + + const selectedProfileId = useUIStore((state) => state.selectedProfileId); + const setSelectedProfileId = useUIStore((state) => state.setSelectedProfileId); + const setSelectedStoryId = useStoryStore((state) => state.setSelectedStoryId); + const { data: profiles } = useProfiles(); + const exportStoryAudio = useExportStoryAudio(); + + const [fileName, setFileName] = useState(null); + const [originalText, setOriginalText] = useState(''); + const [text, setText] = useState(''); + const [targetChunkSize, setTargetChunkSize] = useState(DEFAULT_TARGET_CHUNK_SIZE); + const [language, setLanguage] = useState('en'); + const [modelSize, setModelSize] = useState<'1.7B' | '0.6B'>('1.7B'); + const [instruct, setInstruct] = useState(''); + const [run, setRun] = useState(null); + const [isPreviewGenerating, setIsPreviewGenerating] = useState(false); + const [lastPreviewFingerprint, setLastPreviewFingerprint] = useState(null); + const [isSummaryOpen, setIsSummaryOpen] = useState(true); + const [isSetupOpen, setIsSetupOpen] = useState(true); + const [isChunksOpen, setIsChunksOpen] = useState(true); + + useEffect(() => { + runRef.current = run; + }, [run]); + + useEffect(() => { + if (!selectedProfileId && profiles && profiles.length > 0) { + setSelectedProfileId(profiles[0].id); + } + }, [selectedProfileId, profiles, setSelectedProfileId]); + + useEffect(() => { + if (run?.status === 'running' || run?.status === 'stopping' || run?.status === 'paused') { + setIsSetupOpen(false); + } + }, [run?.status]); + + const preparedChunks = useMemo( + () => chunkText(text, targetChunkSize, HARD_MAX_CHUNK_SIZE), + [text, targetChunkSize], + ); + const oversizedPreparedChunks = useMemo( + () => preparedChunks.filter((chunk) => chunk.charCount > HARD_MAX_CHUNK_SIZE), + [preparedChunks], + ); + + const previewChunks = useMemo(() => { + if (run) { + return run.chunks; + } + + return preparedChunks.map( + (chunk): AudiobookChunk => ({ + ...chunk, + status: 'pending', + attempts: 0, + generationId: undefined, + error: undefined, + }), + ); + }, [preparedChunks, run]); + + const chunkStats = useMemo(() => { + const chunks = run ? run.chunks : previewChunks; + const total = chunks.length; + const completed = chunks.filter((chunk) => chunk.status === 'done').length; + const failed = chunks.filter((chunk) => chunk.status === 'failed').length; + const running = chunks.filter((chunk) => chunk.status === 'running').length; + const pending = chunks.filter((chunk) => chunk.status === 'pending').length; + const progress = total > 0 ? Math.round((completed / total) * 100) : 0; + + return { + total, + completed, + failed, + running, + pending, + progress, + }; + }, [previewChunks, run]); + + const charCount = text.length; + const wordCount = text.split(/\s+/).filter(Boolean).length; + const lineCount = text ? text.split('\n').length : 0; + const isDirty = text !== originalText; + const hasText = text.trim().length > 0; + + const hasPendingRunChunks = useMemo( + () => + !!run && run.chunks.some((chunk) => chunk.status === 'pending' || chunk.status === 'running'), + [run], + ); + const quickPreview = useMemo(() => buildQuickPreviewText(text), [text]); + const previewFingerprint = useMemo( + () => + [selectedProfileId || '', language, modelSize, instruct.trim(), quickPreview.text].join('::'), + [selectedProfileId, language, modelSize, instruct, quickPreview.text], + ); + const isPreviewCurrent = + !!lastPreviewFingerprint && lastPreviewFingerprint === previewFingerprint; + + const processRun = useCallback(async () => { + if (processingRef.current) { + return; + } + + processingRef.current = true; + setIsGenerating(true); + + try { + while (true) { + const current = runRef.current; + if (!current) { + break; + } + + if (stopRequestedRef.current) { + setRun((prev) => { + if (!prev) { + return prev; + } + return { + ...prev, + status: 'stopped', + finishedAt: new Date().toISOString(), + }; + }); + break; + } + + if (pauseRequestedRef.current) { + await wait(250); + continue; + } + + const nextChunkIndex = current.chunks.findIndex((chunk) => chunk.status === 'pending'); + if (nextChunkIndex === -1) { + const hasFailures = current.chunks.some((chunk) => chunk.status === 'failed'); + setRun((prev) => { + if (!prev) { + return prev; + } + return { + ...prev, + status: hasFailures ? 'completed_with_errors' : 'completed', + finishedAt: new Date().toISOString(), + }; + }); + break; + } + + for ( + let attempt = current.chunks[nextChunkIndex].attempts + 1; + attempt <= MAX_CHUNK_RETRIES; + attempt += 1 + ) { + setRun((prev) => { + if (!prev) { + return prev; + } + + const chunks = [...prev.chunks]; + const chunk = chunks[nextChunkIndex]; + if (!chunk) { + return prev; + } + + chunks[nextChunkIndex] = { + ...chunk, + status: 'running', + attempts: attempt, + error: undefined, + }; + + return { + ...prev, + status: 'running', + chunks, + }; + }); + + try { + const latest = runRef.current; + if (!latest) { + break; + } + + const chunk = latest.chunks[nextChunkIndex]; + if (chunk.charCount > HARD_MAX_CHUNK_SIZE) { + setRun((prev) => { + if (!prev) { + return prev; + } + + const chunks = [...prev.chunks]; + const target = chunks[nextChunkIndex]; + if (!target) { + return prev; + } + + chunks[nextChunkIndex] = { + ...target, + status: 'failed', + error: `Chunk exceeds ${HARD_MAX_CHUNK_SIZE} characters. Edit text and retry.`, + }; + + return { + ...prev, + chunks, + }; + }); + break; + } + + const generation = await apiClient.generateSpeech({ + profile_id: latest.profileId, + text: chunk.text, + language: latest.language, + model_size: latest.modelSize, + instruct: latest.instruct || undefined, + }); + + await apiClient.addStoryItem(latest.storyId, { + generation_id: generation.id, + }); + + setRun((prev) => { + if (!prev) { + return prev; + } + + const chunks = [...prev.chunks]; + const target = chunks[nextChunkIndex]; + if (!target) { + return prev; + } + + chunks[nextChunkIndex] = { + ...target, + status: 'done', + generationId: generation.id, + error: undefined, + }; + + return { + ...prev, + chunks, + }; + }); + break; + } catch (error) { + const errorMessage = getErrorMessage(error); + const isLastAttempt = attempt >= MAX_CHUNK_RETRIES; + + setRun((prev) => { + if (!prev) { + return prev; + } + + const chunks = [...prev.chunks]; + const target = chunks[nextChunkIndex]; + if (!target) { + return prev; + } + + chunks[nextChunkIndex] = { + ...target, + status: isLastAttempt ? 'failed' : 'pending', + attempts: attempt, + error: errorMessage, + }; + + return { + ...prev, + chunks, + }; + }); + + if (isLastAttempt) { + break; + } + + await wait(1000 * 2 ** (attempt - 1)); + } + } + } + } finally { + processingRef.current = false; + setIsGenerating(false); + + const latest = runRef.current; + if (latest?.storyId) { + await queryClient.invalidateQueries({ queryKey: ['history'] }); + await queryClient.invalidateQueries({ queryKey: ['stories'] }); + await queryClient.invalidateQueries({ queryKey: ['stories', latest.storyId] }); + } + } + }, [queryClient, setIsGenerating]); + + const handlePickTextFile = () => { + fileInputRef.current?.click(); + }; + + const handleTextFileChange = async (event: React.ChangeEvent) => { + const file = event.target.files?.[0]; + event.target.value = ''; + + if (!file) { + return; + } + + if (!file.name.toLowerCase().endsWith('.txt') && file.type !== 'text/plain') { + toast({ + title: 'Invalid file type', + description: 'Please pick a .txt file.', + variant: 'destructive', + }); + return; + } + + if (file.size > LARGE_FILE_WARNING_BYTES) { + toast({ + title: 'Large text file', + description: 'Large files are supported, but initial parsing may take a moment.', + }); + } + + try { + const loadedText = (await file.text()).replace(/\r\n/g, '\n').replace(/\r/g, '\n'); + if (!loadedText.trim()) { + toast({ + title: 'Empty text file', + description: 'The selected file has no readable text.', + variant: 'destructive', + }); + return; + } + + setFileName(file.name); + setOriginalText(loadedText); + setText(loadedText); + setRun(null); + pauseRequestedRef.current = false; + stopRequestedRef.current = false; + + toast({ + title: 'Text loaded', + description: `${file.name} is ready. Review the beginning and end before starting generation.`, + }); + } catch (error) { + toast({ + title: 'Failed to read file', + description: getErrorMessage(error), + variant: 'destructive', + }); + } + }; + + const handleResetText = () => { + setText(originalText); + }; + + const handleStartGeneration = async () => { + if (!selectedProfileId) { + toast({ + title: 'No voice selected', + description: 'Select a voice profile before generating.', + variant: 'destructive', + }); + return; + } + + if (!text.trim()) { + toast({ + title: 'No text', + description: 'Load or enter text before starting generation.', + variant: 'destructive', + }); + return; + } + + const chunks = chunkText(text, targetChunkSize, HARD_MAX_CHUNK_SIZE); + if (chunks.length === 0) { + toast({ + title: 'No chunks created', + description: 'Adjust text or chunk size and try again.', + variant: 'destructive', + }); + return; + } + + try { + const storyName = createStoryName(fileName); + const story = await apiClient.createStory({ + name: storyName, + description: `Generated from ${fileName || 'manual text'} in Audiobook tab`, + }); + + const nextRun: AudiobookRun = { + storyId: story.id, + storyName: story.name, + profileId: selectedProfileId, + language, + modelSize, + instruct: instruct.trim() || undefined, + chunks: buildRunChunks(chunks), + status: 'running', + startedAt: new Date().toISOString(), + }; + + stopRequestedRef.current = false; + pauseRequestedRef.current = false; + runRef.current = nextRun; + setRun(nextRun); + setIsSetupOpen(false); + setIsSummaryOpen(true); + setIsChunksOpen(true); + + toast({ + title: 'Generation started', + description: `Created story "${story.name}" and queued ${chunks.length} chunks.`, + }); + + void processRun(); + } catch (error) { + toast({ + title: 'Failed to start generation', + description: getErrorMessage(error), + variant: 'destructive', + }); + } + }; + + const handleQuickPreview = async () => { + if (!selectedProfileId) { + toast({ + title: 'No voice selected', + description: 'Select a voice profile before previewing.', + variant: 'destructive', + }); + return; + } + + if (!quickPreview.text) { + toast({ + title: 'No preview text', + description: 'Add text first. Preview uses the first 5 sentences.', + variant: 'destructive', + }); + return; + } + + try { + setIsPreviewGenerating(true); + setIsGenerating(true); + + const generation = await apiClient.generateSpeech({ + profile_id: selectedProfileId, + text: quickPreview.text, + language, + model_size: modelSize, + instruct: instruct.trim() || undefined, + }); + + setAudioWithAutoPlay( + apiClient.getAudioUrl(generation.id), + generation.id, + selectedProfileId, + `Preview (${quickPreview.sentenceCount} sentences)`, + ); + setLastPreviewFingerprint(previewFingerprint); + await queryClient.invalidateQueries({ queryKey: ['history'] }); + + toast({ + title: 'Preview ready', + description: `Generated ${quickPreview.sentenceCount} sentence preview. This is not added to a Story.`, + }); + } catch (error) { + toast({ + title: 'Preview failed', + description: getErrorMessage(error), + variant: 'destructive', + }); + } finally { + setIsPreviewGenerating(false); + setIsGenerating(false); + } + }; + + const handlePause = () => { + if (!run || run.status !== 'running') { + return; + } + pauseRequestedRef.current = true; + setRun((prev) => (prev ? { ...prev, status: 'paused' } : prev)); + setIsGenerating(false); + }; + + const handleResume = () => { + if (!run || (run.status !== 'paused' && run.status !== 'stopped')) { + return; + } + pauseRequestedRef.current = false; + stopRequestedRef.current = false; + setRun((prev) => (prev ? { ...prev, status: 'running' } : prev)); + setIsGenerating(true); + void processRun(); + }; + + const handleStopAfterCurrent = () => { + if (!run || (run.status !== 'running' && run.status !== 'paused')) { + return; + } + pauseRequestedRef.current = false; + stopRequestedRef.current = true; + setRun((prev) => (prev ? { ...prev, status: 'stopping' } : prev)); + }; + + const handleRetryFailed = () => { + if (!run) { + return; + } + + setRun((prev) => { + if (!prev) { + return prev; + } + + const chunks = prev.chunks.map((chunk) => + chunk.status === 'failed' + ? { + ...chunk, + status: 'pending' as ChunkStatus, + attempts: 0, + error: undefined, + } + : chunk, + ); + + return { + ...prev, + chunks, + status: 'stopped', + }; + }); + }; + + const handleOpenStory = () => { + if (!run?.storyId) { + return; + } + setSelectedStoryId(run.storyId); + navigate({ to: '/stories' }); + }; + + const handleExportStory = () => { + if (!run?.storyId) { + return; + } + exportStoryAudio.mutate({ + storyId: run.storyId, + storyName: run.storyName, + }); + }; + + const canStart = + !run || + run.status === 'completed' || + run.status === 'completed_with_errors' || + (run.status === 'stopped' && !hasPendingRunChunks); + const canResume = + !!run && (run.status === 'paused' || run.status === 'stopped') && hasPendingRunChunks; + const canRetryFailed = + !!run && run.chunks.some((chunk) => chunk.status === 'failed') && run.status !== 'running'; + const isRunActive = run?.status === 'running' || run?.status === 'stopping'; + + return ( +
+
+
+

Audiobook

+

+ Load a long TXT, edit it, then generate chunk-by-chunk into a Story. +

+

+ Generation is automatically saved as a Story while chunks are processed. +

+
+
+ + + +
+
+ +
+ + + Source Text +
+ {fileName || 'No file selected'} + {isDirty && Edited} +
+
+ {charCount.toLocaleString()} chars • {wordCount.toLocaleString()} words •{' '} + {lineCount.toLocaleString()} lines +
+
+ + {hasText && ( +
+ +
+

+ Everything visible in this editor will be converted to audio. +

+

+ Cleanup tip: check the beginning and end for title pages, legal notes, + appendices, or trailing metadata you do not want narrated. +

+
+
+ )} +