diff --git a/packages/app/server/src/providers/GeminiProvider.ts b/packages/app/server/src/providers/GeminiProvider.ts index a97e5dc47..463c0e319 100644 --- a/packages/app/server/src/providers/GeminiProvider.ts +++ b/packages/app/server/src/providers/GeminiProvider.ts @@ -3,6 +3,8 @@ import { getCostPerToken } from '../services/AccountingService'; import { LlmTransactionMetadata, Transaction } from '../types'; import { BaseProvider } from './BaseProvider'; import { ProviderType } from './ProviderType'; +import { GeminiImageGenerationToolPricing } from '@merit-systems/echo-typescript-sdk'; +import { Decimal } from '@prisma/client/runtime/library'; import { env } from '../env'; interface GeminiUsage { @@ -14,7 +16,11 @@ interface GeminiUsage { interface GeminiCandidate { content: { parts: Array<{ - text: string; + text?: string; + inlineData?: { + mimeType: string; + data: string; // base64 encoded + }; }>; }; finishReason?: string; @@ -131,6 +137,7 @@ export class GeminiProvider extends BaseProvider { let candidatesTokens = 0; let totalTokens = 0; let providerId = 'gemini-response'; + let imageCost = new Decimal(0); if (this.getIsStream()) { const usage = parseSSEGeminiFormat(data); @@ -152,6 +159,23 @@ export class GeminiProvider extends BaseProvider { totalTokens = parsed.usageMetadata.totalTokenCount || 0; } + + if (parsed?.candidates) { + for (const candidate of parsed.candidates) { + for (const part of candidate.content?.parts || []) { + if (part.inlineData && part.inlineData.mimeType?.startsWith('image/')) { + + imageCost = imageCost.plus( + new Decimal(GeminiImageGenerationToolPricing.cost_per_image) + ); + logger.info( + `Gemini image generation detected: ${part.inlineData.mimeType}` + ); + } + } + } + } + // Try to get a unique identifier from the response // Gemini doesn't return an ID like OpenAI, so we'll generate one based on content if (parsed?.candidates && parsed.candidates.length > 0) { @@ -179,7 +203,7 @@ export class GeminiProvider extends BaseProvider { this.getModel(), promptTokens, candidatesTokens - ), + ).plus(imageCost), status: 'success', }; diff --git a/packages/app/server/src/services/AccountingService.ts b/packages/app/server/src/services/AccountingService.ts index 02e51e14e..69a198942 100644 --- a/packages/app/server/src/services/AccountingService.ts +++ b/packages/app/server/src/services/AccountingService.ts @@ -6,6 +6,7 @@ import { GroqModels, OpenAIImageModels, SupportedOpenAIResponseToolPricing, + GeminiImageGenerationToolPricing, SupportedModel, SupportedImageModel, SupportedVideoModel, diff --git a/packages/sdk/ts/src/index.ts b/packages/sdk/ts/src/index.ts index 521a30f93..913d93c17 100644 --- a/packages/sdk/ts/src/index.ts +++ b/packages/sdk/ts/src/index.ts @@ -25,6 +25,7 @@ export type { ImageGenerationQualityPricing, ImageGenerationModelPricing, ImageGenerationPricing, + GeminiImageGenerationPricing, CodeInterpreterPricing, FileSearchPricing, WebSearchModelPricing, @@ -37,6 +38,7 @@ export { SupportedOpenAIResponseTools, SupportedOpenAIResponseToolPricing, } from './supported-models/responses/openai'; +export { GeminiImageGenerationToolPricing } from './supported-models/tools/gemini'; export { OpenAIModels } from './supported-models/chat/openai'; export type { OpenAIModel } from './supported-models/chat/openai'; export { AnthropicModels } from './supported-models/chat/anthropic'; diff --git a/packages/sdk/ts/src/supported-models/tools/gemini.ts b/packages/sdk/ts/src/supported-models/tools/gemini.ts new file mode 100644 index 000000000..9d856fad7 --- /dev/null +++ b/packages/sdk/ts/src/supported-models/tools/gemini.ts @@ -0,0 +1,5 @@ +import { GeminiImageGenerationPricing } from '../types'; + +export const GeminiImageGenerationToolPricing: GeminiImageGenerationPricing = { + cost_per_image: 0.039, +}; diff --git a/packages/sdk/ts/src/supported-models/types.ts b/packages/sdk/ts/src/supported-models/types.ts index 715f37664..4ddab1a26 100644 --- a/packages/sdk/ts/src/supported-models/types.ts +++ b/packages/sdk/ts/src/supported-models/types.ts @@ -26,8 +26,13 @@ export interface ImageGenerationModelPricing { high: ImageGenerationQualityPricing; } +export interface GeminiImageGenerationPricing { + cost_per_image: number; +} + export interface ImageGenerationPricing { gpt_image_1: ImageGenerationModelPricing; + gemini_flash_image?: GeminiImageGenerationPricing; } export interface CodeInterpreterPricing { diff --git a/packages/tests/provider-smoke/gemini-generate-image.test.ts b/packages/tests/provider-smoke/gemini-generate-image.test.ts new file mode 100644 index 000000000..26da908ac --- /dev/null +++ b/packages/tests/provider-smoke/gemini-generate-image.test.ts @@ -0,0 +1,56 @@ +import { + createEchoGoogle, + GeminiModels, +} from '@merit-systems/echo-typescript-sdk'; +import { generateText } from 'ai'; +import { beforeAll, describe, expect, it } from 'vitest'; +import { + ECHO_APP_ID, + assertEnv, + baseRouterUrl, + getApiErrorDetails, + getToken, +} from './test-helpers'; + +beforeAll(assertEnv); + +const GEMINI_IMAGE_MODELS = [ + 'gemini-2.5-flash-image', + 'gemini-2.5-flash-image-preview', +] as const; + +describe.concurrent('Gemini generateText with image generation', () => { + const google = createEchoGoogle( + { appId: ECHO_APP_ID!, baseRouterUrl }, + getToken + ); + + for (const model_id of GEMINI_IMAGE_MODELS) { + it(`Gemini image ${model_id}`, async () => { + try { + const result = await generateText({ + model: google(model_id), + prompt: 'Generate a simple blue circle', + }); + + + expect(result).toBeDefined(); + + + const imageFile = result.files?.find(file => + file.mediaType?.startsWith('image/') + ); + + expect(imageFile).toBeDefined(); + expect(imageFile?.mediaType).toMatch(/^image\//); + expect(imageFile?.base64).toBeDefined(); + expect(imageFile?.base64?.length).toBeGreaterThan(0); + } catch (err) { + const details = getApiErrorDetails(err); + throw new Error( + `[generateText with image] Gemini ${model_id} failed: ${details}` + ); + } + }); + } +});