nextcloud · julien-nc · Jan 21, 2026 · kyteinsky · Feb 4, 2026 · kyteinsky
diff --git a/composer.lock b/composer.lock
diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php
@@ -11,6 +11,8 @@
 use OCA\OpenAi\Notification\Notifier;
 use OCA\OpenAi\OldProcessing\Translation\TranslationProvider as OldTranslationProvider;
 use OCA\OpenAi\TaskProcessing\AudioToAudioChatProvider;
+use OCA\OpenAi\TaskProcessing\AudioToAudioTranslateProvider;
+use OCA\OpenAi\TaskProcessing\AudioToAudioTranslateTaskType;
 use OCA\OpenAi\TaskProcessing\AudioToTextProvider;
 use OCA\OpenAi\TaskProcessing\ChangeToneProvider;
 use OCA\OpenAi\TaskProcessing\ChangeToneTaskType;
@@ -104,6 +106,8 @@ public function register(IRegistrationContext $context): void {
 		// Task processing
 		if ($this->appConfig->getValueString(Application::APP_ID, 'translation_provider_enabled', '1') === '1') {
 			$context->registerTaskProcessingProvider(TranslateProvider::class);
+			$context->registerTaskProcessingTaskType(AudioToAudioTranslateTaskType::class);
+			$context->registerTaskProcessingProvider(AudioToAudioTranslateProvider::class);
 		}
 		if ($this->appConfig->getValueString(Application::APP_ID, 'stt_provider_enabled', '1') === '1') {
 			$context->registerTaskProcessingProvider(AudioToTextProvider::class);

diff --git a/lib/Service/TranslationService.php b/lib/Service/TranslationService.php
@@ -0,0 +1,127 @@
+<?php
+
+/**
+ * SPDX-FileCopyrightText: 2026 Nextcloud GmbH and Nextcloud contributors
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+namespace OCA\OpenAi\Service;
+
+use OCP\ICacheFactory;
+use OCP\L10N\IFactory;
+use Psr\Log\LoggerInterface;
+
+class TranslationService {
+	public const SYSTEM_PROMPT = 'You are a translations expert that ONLY outputs a valid JSON with the translated text in the following format: { "translation": "<translated text>" } .';
+	public const JSON_RESPONSE_FORMAT = [
+		'response_format' => [
+			'type' => 'json_schema',
+			'json_schema' => [
+				'name' => 'TranslationResponse',
+				'description' => 'A JSON object containing the translated text',
+				'strict' => true,
+				'schema' => [
+					'type' => 'object',
+					'properties' => [
+						'translation' => [
+							'type' => 'string',
+							'description' => 'The translated text',
+						],
+					],
+					'required' => [ 'translation' ],
+					'additionalProperties' => false,
+				],
+			],
+		],
+	];
+
+	public function __construct(
+		private OpenAiSettingsService $openAiSettingsService,
+		private LoggerInterface $logger,
+		private OpenAiAPIService $openAiAPIService,
+		private ChunkService $chunkService,
+		private ICacheFactory $cacheFactory,
+		private IFactory $l10nFactory,
+	) {
+	}
+
+	private function getCoreLanguagesByCode(): array {
+		$coreL = $this->l10nFactory->getLanguages();
+		$coreLanguages = array_reduce(array_merge($coreL['commonLanguages'], $coreL['otherLanguages']), function ($carry, $val) {
+			$carry[$val['code']] = $val['name'];
+			return $carry;
+		});
+		return $coreLanguages;
+	}
+
+	public function translate(
+		string $inputText, string $sourceLanguageCode, string $targetLanguageCode, string $model, ?int $maxTokens,
+		?string $userId, ?callable $reportProgress = null,
+	): string {
+		$chunks = $this->chunkService->chunkSplitPrompt($inputText, true, $maxTokens);
+		$translation = '';
+		$increase = 1.0 / (float)count($chunks);
+		$progress = 0.0;
+		$coreLanguages = $this->getCoreLanguagesByCode();
+
+		$toLanguage = $coreLanguages[$targetLanguageCode] ?? $targetLanguageCode;
+
+		if ($sourceLanguageCode !== 'detect_language') {
+			$fromLanguage = $coreLanguages[$sourceLanguageCode] ?? $sourceLanguageCode;
+			$promptStart = 'Translate the following text from ' . $fromLanguage . ' to ' . $toLanguage . ': ';
+		} else {
+			$promptStart = 'Translate the following text to ' . $toLanguage . ': ';
+		}
+
+		foreach ($chunks as $chunk) {
+			$progress += $increase;
+			$cacheKey = $sourceLanguageCode . '/' . $targetLanguageCode . '/' . md5($chunk);
+
+			$cache = $this->cacheFactory->createDistributed('integration_openai');
+			if ($cached = $cache->get($cacheKey)) {
+				$this->logger->debug('Using cached translation', ['cached' => $cached, 'cacheKey' => $cacheKey]);
+				$translation .= $cached;
+				if ($reportProgress !== null) {
+					$reportProgress($progress);
+				}
+				continue;
+			}
+			$prompt = $promptStart . PHP_EOL . PHP_EOL . $chunk;
+
+			if ($this->openAiAPIService->isUsingOpenAi() || $this->openAiSettingsService->getChatEndpointEnabled()) {
+				$completionsObj = $this->openAiAPIService->createChatCompletion(
+					$userId, $model, $prompt, self::SYSTEM_PROMPT, null, 1, $maxTokens, self::JSON_RESPONSE_FORMAT
+				);
+				$completions = $completionsObj['messages'];
+			} else {
+				$completions = $this->openAiAPIService->createCompletion(
+					$userId, $prompt . PHP_EOL . self::SYSTEM_PROMPT . PHP_EOL . PHP_EOL, 1, $model, $maxTokens
+				);
+			}
+
+			if ($reportProgress !== null) {
+				$reportProgress($progress);
+			}
+
+			if (count($completions) === 0) {
+				$this->logger->error('Empty translation response received for chunk');
+				continue;
+			}
+
+			$completion = array_pop($completions);
+			$decodedCompletion = json_decode($completion, true);
+			if (
+				!isset($decodedCompletion['translation'])
+				|| !is_string($decodedCompletion['translation'])
+				|| empty($decodedCompletion['translation'])
+			) {
+				$this->logger->error('Invalid translation response received for chunk', ['response' => $completion]);
+				continue;
+			}
+			$translation .= $decodedCompletion['translation'];
+			$cache->set($cacheKey, $decodedCompletion['translation']);
+			continue;
+		}
+		return $translation;
+	}
+}
diff --git a/lib/TaskProcessing/AudioToAudioTranslateProvider.php b/lib/TaskProcessing/AudioToAudioTranslateProvider.php
@@ -0,0 +1,195 @@
+<?php
+
+declare(strict_types=1);
+
+/**
+ * SPDX-FileCopyrightText: 2026 Nextcloud GmbH and Nextcloud contributors
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+namespace OCA\OpenAi\TaskProcessing;
+
+use Exception;
+use OCA\OpenAi\AppInfo\Application;
+use OCA\OpenAi\Service\OpenAiAPIService;
+use OCA\OpenAi\Service\OpenAiSettingsService;
+use OCA\OpenAi\Service\TranslationService;
+use OCA\OpenAi\Service\WatermarkingService;
+use OCP\Files\File;
+use OCP\IAppConfig;
+use OCP\IL10N;
+use OCP\IUserManager;
+use OCP\L10N\IFactory;
+use OCP\TaskProcessing\Exception\ProcessingException;
+use OCP\TaskProcessing\ISynchronousWatermarkingProvider;
+use OCP\TaskProcessing\ShapeEnumValue;
+use Psr\Log\LoggerInterface;
+
+class AudioToAudioTranslateProvider implements ISynchronousWatermarkingProvider {
+
+	public function __construct(
+		private OpenAiAPIService $openAiAPIService,
+		private TranslationService $translationService,
+		private OpenAiSettingsService $openAiSettingsService,
+		private WatermarkingService $watermarkingService,
+		private LoggerInterface $logger,
+		private IFactory $l10nFactory,
+		private IL10N $l,
+		private IAppConfig $appConfig,
+		private IUserManager $userManager,
+	) {
+	}
+
+	public function getId(): string {
+		return Application::APP_ID . '-audio2audio:translate';
+	}
+
+	public function getName(): string {
+		return $this->openAiAPIService->getServiceName(Application::SERVICE_TYPE_STT);
+	}
+
+	public function getTaskTypeId(): string {
+		return AudioToAudioTranslateTaskType::ID;
+	}
+
+	public function getExpectedRuntime(): int {
+		return 60;
+	}
+
+	public function getInputShapeEnumValues(): array {
+		$coreL = $this->l10nFactory->getLanguages();
+		$languages = array_merge($coreL['commonLanguages'], $coreL['otherLanguages']);
+		$languageEnumValues = array_map(static function (array $language) {
+			return new ShapeEnumValue($language['name'], $language['code']);
+		}, $languages);
+		$detectLanguageEnumValue = new ShapeEnumValue($this->l->t('Detect language'), 'detect_language');
+		return [
+			'origin_language' => array_merge([$detectLanguageEnumValue], $languageEnumValues),
+			'target_language' => $languageEnumValues,
+		];
+	}
+
+	public function getInputShapeDefaults(): array {
+		return [
+			'origin_language' => 'detect_language',
+		];
+	}
+
+
+	public function getOptionalInputShape(): array {
+		return [];
+	}
 public function getOptionalInputShape(): array { 
 	return [ 
 		'voice' => new ShapeDescriptor( 
 			$this->l->t('Voice'), 
 			$this->l->t('The voice to use'), 
 			EShapeType::Enum 
 		), 
 		'model' => new ShapeDescriptor( 
 			$this->l->t('Model'), 
 			$this->l->t('The model used to generate the speech'), 
 			EShapeType::Enum 
 		), 
 		'speed' => new ShapeDescriptor( 
 			$this->l->t('Speed'), 
 			$this->openAiAPIService->isUsingOpenAi(Application::SERVICE_TYPE_TTS) 
 				? $this->l->t('Speech speed modifier (Valid values: 0.25-4)') 
 				: $this->l->t('Speech speed modifier'), 
 			EShapeType::Number 
 		) 
 	]; 
 } 
 public function getOptionalInputShape(): array { 
 	return [ 
 		'voice' => new ShapeDescriptor( 
 			$this->l->t('Voice'), 
 			$this->l->t('The voice to use'), 
 			EShapeType::Enum 
 		), 
 		'model' => new ShapeDescriptor( 
 			$this->l->t('Model'), 
 			$this->l->t('The model used to generate the speech'), 
 			EShapeType::Enum 
 		), 
 		'speed' => new ShapeDescriptor( 
 			$this->l->t('Speed'), 
 			$this->openAiAPIService->isUsingOpenAi(Application::SERVICE_TYPE_TTS) 
 				? $this->l->t('Speech speed modifier (Valid values: 0.25-4)') 
 				: $this->l->t('Speech speed modifier'), 
 			EShapeType::Number 
 		) 
 	]; 
 } 
+
+	public function getOptionalInputShapeEnumValues(): array {
+		return [];
+	}
+
+	public function getOptionalInputShapeDefaults(): array {
+		return [];
+	}
+
+	public function getOutputShapeEnumValues(): array {
+		return [];
+	}
+
+	public function getOptionalOutputShape(): array {
+		return [];
+	}
+
+	public function getOptionalOutputShapeEnumValues(): array {
+		return [];
+	}
+
+	public function process(?string $userId, array $input, callable $reportProgress, bool $includeWatermark = true): array {
+		if (!isset($input['input']) || !$input['input'] instanceof File || !$input['input']->isReadable()) {
+			throw new ProcessingException('Invalid input file');
+		}
+		$inputFile = $input['input'];
+
+		if (!isset($input['origin_language']) || !is_string($input['origin_language'])) {
+			throw new ProcessingException('Invalid origin_language input');
+		}
+		if (!isset($input['target_language']) || !is_string($input['target_language'])) {
+			throw new ProcessingException('Invalid target_language input');
+		}
+
+		// STT
+		$sttModel = $this->appConfig->getValueString(Application::APP_ID, 'default_stt_model_id', Application::DEFAULT_MODEL_ID, lazy: true) ?: Application::DEFAULT_MODEL_ID;
+		try {
+			$transcription = $this->openAiAPIService->transcribeFile($userId, $inputFile, false, $sttModel, $input['origin_language']);
+		} catch (Exception $e) {
+			$this->logger->warning('Transcription failed with: ' . $e->getMessage(), ['exception' => $e]);
+			throw new ProcessingException(
+				'Transcription failed with: ' . $e->getMessage(),
+				$e->getCode(),
+				$e,
+			);
+		}
+
+		$reportProgress(0.3);
+
+		// translate
+		$completionModel = $this->openAiAPIService->isUsingOpenAi()
+			? ($this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', Application::DEFAULT_MODEL_ID, lazy: true) ?: Application::DEFAULT_MODEL_ID)
+			: $this->appConfig->getValueString(Application::APP_ID, 'default_completion_model_id', lazy: true);
+		$maxTokens = $this->openAiSettingsService->getMaxTokens();
+
+		try {
+			$translatedText = $this->translationService->translate(
+				$transcription, $input['origin_language'], $input['target_language'], $completionModel, $maxTokens, $userId,
+			);
+
+			if (empty($translatedText)) {
+				throw new ProcessingException("Empty translation result from {$input['origin_language']} to {$input['target_language']}");
+			}
+		} catch (Exception $e) {
+			throw new ProcessingException(
+				"Failed to translate from {$input['origin_language']} to {$input['target_language']}: {$e->getMessage()}",
+				$e->getCode(),
+				$e,
+			);
+		}
+
+		$reportProgress(0.6);
+
+		// TTS
+		$ttsPrompt = $translatedText;
+		if ($includeWatermark) {
+			if ($userId !== null) {
+				$user = $this->userManager->getExistingUser($userId);
+				$lang = $this->l10nFactory->getUserLanguage($user);
+				$l = $this->l10nFactory->get(Application::APP_ID, $lang);
+				$ttsPrompt .= "\n\n" . $l->t('This was generated using Artificial Intelligence.');
+			} else {
+				$ttsPrompt .= "\n\n" . $this->l->t('This was generated using Artificial Intelligence.');
+			}
+		}
+		$ttsModel = $this->appConfig->getValueString(Application::APP_ID, 'default_speech_model_id', Application::DEFAULT_SPEECH_MODEL_ID, lazy: true) ?: Application::DEFAULT_SPEECH_MODEL_ID;
+		$voice = $this->appConfig->getValueString(Application::APP_ID, 'default_speech_voice', Application::DEFAULT_SPEECH_VOICE, lazy: true) ?: Application::DEFAULT_SPEECH_VOICE;
+		$speed = 1;
+		try {
+			$apiResponse = $this->openAiAPIService->requestSpeechCreation($userId, $ttsPrompt, $ttsModel, $voice, $speed);
+
+			if (!isset($apiResponse['body'])) {
+				$this->logger->warning('Text to speech generation failed: no speech returned');
+				throw new ProcessingException('Text to speech generation failed: no speech returned');
+			}
+			$translatedAudio = $includeWatermark ? $this->watermarkingService->markAudio($apiResponse['body']) : $apiResponse['body'];
+		} catch (\Exception $e) {
+			$this->logger->warning('Text to speech generation failed with: ' . $e->getMessage(), ['exception' => $e]);
+			throw new ProcessingException(
+				'Text to speech generation failed with: ' . $e->getMessage(),
+				$e->getCode(),
+				$e,
+			);
+		}
+
+		$reportProgress(1.0);
+
+		// Translation
+		return [
+			'audio_output' => $translatedAudio,
+			'text_output' => $translatedText,
+		];
+	}
+}