From 2e45e174fa6118edad773999ce1db67ca56e1f6f Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Wed, 21 Jan 2026 13:33:13 +0100 Subject: [PATCH] feat(audio-translation): add audio translation task type and provider, factorize translation logic in a service use the correct user language for text translations happening in the task Signed-off-by: Julien Veyssier --- composer.lock | 8 +- lib/AppInfo/Application.php | 4 + lib/Service/TranslationService.php | 127 ++++++++++++ .../AudioToAudioTranslateProvider.php | 195 ++++++++++++++++++ .../AudioToAudioTranslateTaskType.php | 87 ++++++++ lib/TaskProcessing/TranslateProvider.php | 119 +---------- tests/unit/Providers/OpenAiProviderTest.php | 21 +- 7 files changed, 442 insertions(+), 119 deletions(-) create mode 100644 lib/Service/TranslationService.php create mode 100644 lib/TaskProcessing/AudioToAudioTranslateProvider.php create mode 100644 lib/TaskProcessing/AudioToAudioTranslateTaskType.php diff --git a/composer.lock b/composer.lock index 722e65f1..1a99a873 100644 --- a/composer.lock +++ b/composer.lock @@ -202,12 +202,12 @@ "source": { "type": "git", "url": "https://github.com/nextcloud-deps/ocp.git", - "reference": "2ce5bef7efc76907b708860d472ccaf401d3bf0c" + "reference": "0992cfc56e143a27b3b8311cf69cad83e7956beb" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/nextcloud-deps/ocp/zipball/2ce5bef7efc76907b708860d472ccaf401d3bf0c", - "reference": "2ce5bef7efc76907b708860d472ccaf401d3bf0c", + "url": "https://api.github.com/repos/nextcloud-deps/ocp/zipball/0992cfc56e143a27b3b8311cf69cad83e7956beb", + "reference": "0992cfc56e143a27b3b8311cf69cad83e7956beb", "shasum": "" }, "require": { @@ -243,7 +243,7 @@ "issues": "https://github.com/nextcloud-deps/ocp/issues", "source": "https://github.com/nextcloud-deps/ocp/tree/master" }, - "time": "2025-11-25T00:51:14+00:00" + "time": "2026-01-21T00:58:31+00:00" }, { "name": "psr/clock", diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index dad578b2..f0e18501 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -11,6 +11,8 @@ use OCA\OpenAi\Notification\Notifier; use OCA\OpenAi\OldProcessing\Translation\TranslationProvider as OldTranslationProvider; use OCA\OpenAi\TaskProcessing\AudioToAudioChatProvider; +use OCA\OpenAi\TaskProcessing\AudioToAudioTranslateProvider; +use OCA\OpenAi\TaskProcessing\AudioToAudioTranslateTaskType; use OCA\OpenAi\TaskProcessing\AudioToTextProvider; use OCA\OpenAi\TaskProcessing\ChangeToneProvider; use OCA\OpenAi\TaskProcessing\ChangeToneTaskType; @@ -104,6 +106,8 @@ public function register(IRegistrationContext $context): void { // Task processing if ($this->appConfig->getValueString(Application::APP_ID, 'translation_provider_enabled', '1') === '1') { $context->registerTaskProcessingProvider(TranslateProvider::class); + $context->registerTaskProcessingTaskType(AudioToAudioTranslateTaskType::class); + $context->registerTaskProcessingProvider(AudioToAudioTranslateProvider::class); } if ($this->appConfig->getValueString(Application::APP_ID, 'stt_provider_enabled', '1') === '1') { $context->registerTaskProcessingProvider(AudioToTextProvider::class); diff --git a/lib/Service/TranslationService.php b/lib/Service/TranslationService.php new file mode 100644 index 00000000..46e2b162 --- /dev/null +++ b/lib/Service/TranslationService.php @@ -0,0 +1,127 @@ +" } .'; + public const JSON_RESPONSE_FORMAT = [ + 'response_format' => [ + 'type' => 'json_schema', + 'json_schema' => [ + 'name' => 'TranslationResponse', + 'description' => 'A JSON object containing the translated text', + 'strict' => true, + 'schema' => [ + 'type' => 'object', + 'properties' => [ + 'translation' => [ + 'type' => 'string', + 'description' => 'The translated text', + ], + ], + 'required' => [ 'translation' ], + 'additionalProperties' => false, + ], + ], + ], + ]; + + public function __construct( + private OpenAiSettingsService $openAiSettingsService, + private LoggerInterface $logger, + private OpenAiAPIService $openAiAPIService, + private ChunkService $chunkService, + private ICacheFactory $cacheFactory, + private IFactory $l10nFactory, + ) { + } + + private function getCoreLanguagesByCode(): array { + $coreL = $this->l10nFactory->getLanguages(); + $coreLanguages = array_reduce(array_merge($coreL['commonLanguages'], $coreL['otherLanguages']), function ($carry, $val) { + $carry[$val['code']] = $val['name']; + return $carry; + }); + return $coreLanguages; + } + + public function translate( + string $inputText, string $sourceLanguageCode, string $targetLanguageCode, string $model, ?int $maxTokens, + ?string $userId, ?callable $reportProgress = null, + ): string { + $chunks = $this->chunkService->chunkSplitPrompt($inputText, true, $maxTokens); + $translation = ''; + $increase = 1.0 / (float)count($chunks); + $progress = 0.0; + $coreLanguages = $this->getCoreLanguagesByCode(); + + $toLanguage = $coreLanguages[$targetLanguageCode] ?? $targetLanguageCode; + + if ($sourceLanguageCode !== 'detect_language') { + $fromLanguage = $coreLanguages[$sourceLanguageCode] ?? $sourceLanguageCode; + $promptStart = 'Translate the following text from ' . $fromLanguage . ' to ' . $toLanguage . ': '; + } else { + $promptStart = 'Translate the following text to ' . $toLanguage . ': '; + } + + foreach ($chunks as $chunk) { + $progress += $increase; + $cacheKey = $sourceLanguageCode . '/' . $targetLanguageCode . '/' . md5($chunk); + + $cache = $this->cacheFactory->createDistributed('integration_openai'); + if ($cached = $cache->get($cacheKey)) { + $this->logger->debug('Using cached translation', ['cached' => $cached, 'cacheKey' => $cacheKey]); + $translation .= $cached; + if ($reportProgress !== null) { + $reportProgress($progress); + } + continue; + } + $prompt = $promptStart . PHP_EOL . PHP_EOL . $chunk; + + if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { + $completionsObj = $this->openAiAPIService->createChatCompletion( + $userId, $model, $prompt, self::SYSTEM_PROMPT, null, 1, $maxTokens, self::JSON_RESPONSE_FORMAT + ); + $completions = $completionsObj['messages']; + } else { + $completions = $this->openAiAPIService->createCompletion( + $userId, $prompt . PHP_EOL . self::SYSTEM_PROMPT . PHP_EOL . PHP_EOL, 1, $model, $maxTokens + ); + } + + if ($reportProgress !== null) { + $reportProgress($progress); + } + + if (count($completions) === 0) { + $this->logger->error('Empty translation response received for chunk'); + continue; + } + + $completion = array_pop($completions); + $decodedCompletion = json_decode($completion, true); + if ( + !isset($decodedCompletion['translation']) + || !is_string($decodedCompletion['translation']) + || empty($decodedCompletion['translation']) + ) { + $this->logger->error('Invalid translation response received for chunk', ['response' => $completion]); + continue; + } + $translation .= $decodedCompletion['translation']; + $cache->set($cacheKey, $decodedCompletion['translation']); + continue; + } + return $translation; + } +} diff --git a/lib/TaskProcessing/AudioToAudioTranslateProvider.php b/lib/TaskProcessing/AudioToAudioTranslateProvider.php new file mode 100644 index 00000000..c06b882a --- /dev/null +++ b/lib/TaskProcessing/AudioToAudioTranslateProvider.php @@ -0,0 +1,195 @@ +openAiAPIService->getServiceName(Application::SERVICE_TYPE_STT); + } + + public function getTaskTypeId(): string { + return AudioToAudioTranslateTaskType::ID; + } + + public function getExpectedRuntime(): int { + return 60; + } + + public function getInputShapeEnumValues(): array { + $coreL = $this->l10nFactory->getLanguages(); + $languages = array_merge($coreL['commonLanguages'], $coreL['otherLanguages']); + $languageEnumValues = array_map(static function (array $language) { + return new ShapeEnumValue($language['name'], $language['code']); + }, $languages); + $detectLanguageEnumValue = new ShapeEnumValue($this->l->t('Detect language'), 'detect_language'); + return [ + 'origin_language' => array_merge([$detectLanguageEnumValue], $languageEnumValues), + 'target_language' => $languageEnumValues, + ]; + } + + public function getInputShapeDefaults(): array { + return [ + 'origin_language' => 'detect_language', + ]; + } + + + public function getOptionalInputShape(): array { + return []; + } + + public function getOptionalInputShapeEnumValues(): array { + return []; + } + + public function getOptionalInputShapeDefaults(): array { + return []; + } + + public function getOutputShapeEnumValues(): array { + return []; + } + + public function getOptionalOutputShape(): array { + return []; + } + + public function getOptionalOutputShapeEnumValues(): array { + return []; + } + + public function process(?string $userId, array $input, callable $reportProgress, bool $includeWatermark = true): array { + if (!isset($input['input']) || !$input['input'] instanceof File || !$input['input']->isReadable()) { + throw new ProcessingException('Invalid input file'); + } + $inputFile = $input['input']; + + if (!isset($input['origin_language']) || !is_string($input['origin_language'])) { + throw new ProcessingException('Invalid origin_language input'); + } + if (!isset($input['target_language']) || !is_string($input['target_language'])) { + throw new ProcessingException('Invalid target_language input'); + } + + // STT + $sttModel = $this->appConfig->getValueString(Application::APP_ID, 'default_stt_model_id', Application::DEFAULT_MODEL_ID, lazy: true) ?: Application::DEFAULT_MODEL_ID; + try { + $transcription = $this->openAiAPIService->transcribeFile($userId, $inputFile, false, $sttModel, $input['origin_language']); + } catch (Exception $e) { + $this->logger->warning('Transcription failed with: ' . $e->getMessage(), ['exception' => $e]); + throw new ProcessingException( + 'Transcription failed with: ' . $e->getMessage(), + $e->getCode(), + $e, + ); + } + + $reportProgress(0.3); + + // translate + $completionModel = $this->openAiAPIService->isUsingOpenAi() + ? ($this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID, lazy: true) ?: Application::DEFAULT_MODEL_ID) + : $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', lazy: true); + $maxTokens = $this->openAiSettingsService->getMaxTokens(); + + try { + $translatedText = $this->translationService->translate( + $transcription, $input['origin_language'], $input['target_language'], $completionModel, $maxTokens, $userId, + ); + + if (empty($translatedText)) { + throw new ProcessingException("Empty translation result from {$input['origin_language']} to {$input['target_language']}"); + } + } catch (Exception $e) { + throw new ProcessingException( + "Failed to translate from {$input['origin_language']} to {$input['target_language']}: {$e->getMessage()}", + $e->getCode(), + $e, + ); + } + + $reportProgress(0.6); + + // TTS + $ttsPrompt = $translatedText; + if ($includeWatermark) { + if ($userId !== null) { + $user = $this->userManager->getExistingUser($userId); + $lang = $this->l10nFactory->getUserLanguage($user); + $l = $this->l10nFactory->get(Application::APP_ID, $lang); + $ttsPrompt .= "\n\n" . $l->t('This was generated using Artificial Intelligence.'); + } else { + $ttsPrompt .= "\n\n" . $this->l->t('This was generated using Artificial Intelligence.'); + } + } + $ttsModel = $this->appConfig->getValueString(Application::APP_ID, 'default_speech_model_id', Application::DEFAULT_SPEECH_MODEL_ID, lazy: true) ?: Application::DEFAULT_SPEECH_MODEL_ID; + $voice = $this->appConfig->getValueString(Application::APP_ID, 'default_speech_voice', Application::DEFAULT_SPEECH_VOICE, lazy: true) ?: Application::DEFAULT_SPEECH_VOICE; + $speed = 1; + try { + $apiResponse = $this->openAiAPIService->requestSpeechCreation($userId, $ttsPrompt, $ttsModel, $voice, $speed); + + if (!isset($apiResponse['body'])) { + $this->logger->warning('Text to speech generation failed: no speech returned'); + throw new ProcessingException('Text to speech generation failed: no speech returned'); + } + $translatedAudio = $includeWatermark ? $this->watermarkingService->markAudio($apiResponse['body']) : $apiResponse['body']; + } catch (\Exception $e) { + $this->logger->warning('Text to speech generation failed with: ' . $e->getMessage(), ['exception' => $e]); + throw new ProcessingException( + 'Text to speech generation failed with: ' . $e->getMessage(), + $e->getCode(), + $e, + ); + } + + $reportProgress(1.0); + + // Translation + return [ + 'audio_output' => $translatedAudio, + 'text_output' => $translatedText, + ]; + } +} diff --git a/lib/TaskProcessing/AudioToAudioTranslateTaskType.php b/lib/TaskProcessing/AudioToAudioTranslateTaskType.php new file mode 100644 index 00000000..fa7e4c19 --- /dev/null +++ b/lib/TaskProcessing/AudioToAudioTranslateTaskType.php @@ -0,0 +1,87 @@ +l->t('Translate audio'); + } + + /** + * @inheritDoc + */ + public function getDescription(): string { + return $this->l->t('Translate the input voice'); + } + + /** + * @return string + */ + public function getId(): string { + return self::ID; + } + + /** + * @return ShapeDescriptor[] + */ + public function getInputShape(): array { + return [ + 'input' => new ShapeDescriptor( + $this->l->t('Input audio'), + $this->l->t('The audio to translate'), + EShapeType::Audio, + ), + 'origin_language' => new ShapeDescriptor( + $this->l->t('Origin language'), + $this->l->t('The language of the origin audio'), + EShapeType::Enum, + ), + 'target_language' => new ShapeDescriptor( + $this->l->t('Target language'), + $this->l->t('The desired language to translate the origin audio in'), + EShapeType::Enum, + ), + ]; + } + + /** + * @return ShapeDescriptor[] + */ + public function getOutputShape(): array { + return [ + 'text_output' => new ShapeDescriptor( + $this->l->t('Text output'), + $this->l->t('The text translation'), + EShapeType::Text, + ), + 'audio_output' => new ShapeDescriptor( + $this->l->t('Audio output'), + $this->l->t('The audio translation'), + EShapeType::Audio, + ), + ]; + } +} diff --git a/lib/TaskProcessing/TranslateProvider.php b/lib/TaskProcessing/TranslateProvider.php index 2325d40e..b6e81de1 100644 --- a/lib/TaskProcessing/TranslateProvider.php +++ b/lib/TaskProcessing/TranslateProvider.php @@ -11,11 +11,10 @@ use Exception; use OCA\OpenAi\AppInfo\Application; -use OCA\OpenAi\Service\ChunkService; use OCA\OpenAi\Service\OpenAiAPIService; use OCA\OpenAi\Service\OpenAiSettingsService; +use OCA\OpenAi\Service\TranslationService; use OCP\IAppConfig; -use OCP\ICacheFactory; use OCP\IL10N; use OCP\L10N\IFactory; use OCP\TaskProcessing\EShapeType; @@ -25,42 +24,16 @@ use OCP\TaskProcessing\ShapeDescriptor; use OCP\TaskProcessing\ShapeEnumValue; use OCP\TaskProcessing\TaskTypes\TextToTextTranslate; -use Psr\Log\LoggerInterface; class TranslateProvider implements ISynchronousProvider { - public const SYSTEM_PROMPT = 'You are a translations expert that ONLY outputs a valid JSON with the translated text in the following format: { "translation": "" } .'; - public const JSON_RESPONSE_FORMAT = [ - 'response_format' => [ - 'type' => 'json_schema', - 'json_schema' => [ - 'name' => 'TranslationResponse', - 'description' => 'A JSON object containing the translated text', - 'strict' => true, - 'schema' => [ - 'type' => 'object', - 'properties' => [ - 'translation' => [ - 'type' => 'string', - 'description' => 'The translated text', - ], - ], - 'required' => [ 'translation' ], - 'additionalProperties' => false, - ], - ], - ], - ]; - public function __construct( private OpenAiAPIService $openAiAPIService, private IAppConfig $appConfig, private OpenAiSettingsService $openAiSettingsService, private IL10N $l, private IFactory $l10nFactory, - private ICacheFactory $cacheFactory, - private LoggerInterface $logger, - private ChunkService $chunkService, + private TranslationService $translationService, private ?string $userId, ) { } @@ -143,23 +116,7 @@ public function getOptionalOutputShapeEnumValues(): array { return []; } - private function getCoreLanguagesByCode(): array { - $coreL = $this->l10nFactory->getLanguages(); - $coreLanguages = array_reduce(array_merge($coreL['commonLanguages'], $coreL['otherLanguages']), function ($carry, $val) { - $carry[$val['code']] = $val['name']; - return $carry; - }); - return $coreLanguages; - } - public function process(?string $userId, array $input, callable $reportProgress): array { - /* - foreach (range(1, 20) as $i) { - $reportProgress($i / 100 * 5); - error_log('aa ' . ($i / 100 * 5)); - sleep(1); - } - */ $startTime = time(); if (isset($input['model']) && is_string($input['model'])) { $model = $input['model']; @@ -180,80 +137,22 @@ public function process(?string $userId, array $input, callable $reportProgress) $maxTokens = $input['max_tokens']; } - $chunks = $this->chunkService->chunkSplitPrompt($inputText, true, $maxTokens); - $result = ''; - $increase = 1.0 / (float)count($chunks); - $progress = 0.0; try { - $coreLanguages = $this->getCoreLanguagesByCode(); - - $fromLanguage = $input['origin_language']; - $toLanguage = $coreLanguages[$input['target_language']] ?? $input['target_language']; - - if ($input['origin_language'] !== 'detect_language') { - $fromLanguage = $coreLanguages[$input['origin_language']] ?? $input['origin_language']; - $promptStart = 'Translate the following text from ' . $fromLanguage . ' to ' . $toLanguage . ': '; - } else { - $promptStart = 'Translate the following text to ' . $toLanguage . ': '; - } - - foreach ($chunks as $chunk) { - $progress += $increase; - $cacheKey = ($input['origin_language'] ?? '') . '/' . $input['target_language'] . '/' . md5($chunk); - - $cache = $this->cacheFactory->createDistributed('integration_openai'); - if ($cached = $cache->get($cacheKey)) { - $this->logger->debug('Using cached translation', ['cached' => $cached, 'cacheKey' => $cacheKey]); - $result .= $cached; - $reportProgress($progress); - continue; - } - $prompt = $promptStart . PHP_EOL . PHP_EOL . $chunk; - - if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) { - $completionsObj = $this->openAiAPIService->createChatCompletion( - $userId, $model, $prompt, self::SYSTEM_PROMPT, null, 1, $maxTokens, self::JSON_RESPONSE_FORMAT - ); - $completions = $completionsObj['messages']; - } else { - $completions = $this->openAiAPIService->createCompletion( - $userId, $prompt . PHP_EOL . self::SYSTEM_PROMPT . PHP_EOL . PHP_EOL, 1, $model, $maxTokens - ); - } - - $reportProgress($progress); - - if (count($completions) === 0) { - $this->logger->error('Empty translation response received for chunk'); - continue; - } - - $completion = array_pop($completions); - $decodedCompletion = json_decode($completion, true); - if ( - !isset($decodedCompletion['translation']) - || !is_string($decodedCompletion['translation']) - || empty($decodedCompletion['translation']) - ) { - $this->logger->error('Invalid translation response received for chunk', ['response' => $completion]); - continue; - } - $result .= $decodedCompletion['translation']; - $cache->set($cacheKey, $decodedCompletion['translation']); - continue; - } + $translation = $this->translationService->translate( + $inputText, $input['origin_language'], $input['target_language'], $model, $maxTokens, $userId, $reportProgress, + ); $endTime = time(); $this->openAiAPIService->updateExpTextProcessingTime($endTime - $startTime); - if (empty(trim($result))) { - throw new ProcessingException("Empty translation result from {$fromLanguage} to {$toLanguage}"); + if (empty($translation)) { + throw new ProcessingException("Empty translation result from {$input['origin_language']} to {$input['target_language']}"); } - return ['output' => trim($result)]; + return ['output' => $translation]; } catch (Exception $e) { throw new ProcessingException( - "Failed to translate from {$fromLanguage} to {$toLanguage}: {$e->getMessage()}", + "Failed to translate from {$input['origin_language']} to {$input['target_language']}: {$e->getMessage()}", $e->getCode(), $e, ); diff --git a/tests/unit/Providers/OpenAiProviderTest.php b/tests/unit/Providers/OpenAiProviderTest.php index ddd9beee..be29967a 100644 --- a/tests/unit/Providers/OpenAiProviderTest.php +++ b/tests/unit/Providers/OpenAiProviderTest.php @@ -18,6 +18,7 @@ use OCA\OpenAi\Service\OpenAiAPIService; use OCA\OpenAi\Service\OpenAiSettingsService; use OCA\OpenAi\Service\QuotaRuleService; +use OCA\OpenAi\Service\TranslationService; use OCA\OpenAi\Service\WatermarkingService; use OCA\OpenAi\TaskProcessing\ChangeToneProvider; use OCA\OpenAi\TaskProcessing\EmojiProvider; @@ -32,6 +33,7 @@ use OCP\Http\Client\IClientService; use OCP\IAppConfig; use OCP\ICacheFactory; +use OCP\L10N\IFactory; use PHPUnit\Framework\MockObject\MockObject; use Psr\Log\LoggerInterface; use Test\TestCase; @@ -49,6 +51,7 @@ class OpenAiProviderTest extends TestCase { private OpenAiAPIService $openAiApiService; private OpenAiSettingsService $openAiSettingsService; private ChunkService $chunkService; + private TranslationService $translationService; /** * @var MockObject|IClient */ @@ -90,6 +93,16 @@ protected function setUp(): void { $clientService, ); + $this->translationService = \OCP\Server::get(TranslationService::class); + $this->translationService = new TranslationService( + $this->openAiSettingsService, + \OCP\Server::get(\Psr\Log\LoggerInterface::class), + $this->openAiApiService, + $this->chunkService, + \OCP\Server::get(ICacheFactory::class), + \OCP\Server::get(IFactory::class), + ); + $this->openAiSettingsService->setUserApiKey(self::TEST_USER1, 'This is a PHPUnit test API key'); } @@ -512,9 +525,7 @@ public function testTranslationProvider(): void { $this->openAiSettingsService, $this->createMock(\OCP\IL10N::class), \OCP\Server::get(\OCP\L10N\IFactory::class), - $this->createMock(\OCP\ICacheFactory::class), - $this->createMock(\Psr\Log\LoggerInterface::class), - $this->chunkService, + $this->translationService, self::TEST_USER1, ); @@ -554,13 +565,13 @@ public function testTranslationProvider(): void { $options['body'] = json_encode([ 'model' => Application::DEFAULT_COMPLETION_MODEL_ID, 'messages' => [ - ['role' => 'system', 'content' => $translationProvider::SYSTEM_PROMPT], + ['role' => 'system', 'content' => TranslationService::SYSTEM_PROMPT], ['role' => 'user', 'content' => $prompt], ], 'n' => $n, 'max_completion_tokens' => Application::DEFAULT_MAX_NUM_OF_TOKENS, 'user' => self::TEST_USER1, - ...$translationProvider::JSON_RESPONSE_FORMAT, + ...TranslationService::JSON_RESPONSE_FORMAT, ]); $iResponse = $this->createMock(\OCP\Http\Client\IResponse::class);