Skip to content

Commit 0e09884

Browse files
author
dcaswell
committed
feat: Add an interface for the ability to process speech-to-text asynchronously
This adds the ability to be able to send a request to a provider to create a transcript where the provider will give you an id and then send a webhook to you in the future when the job is done with that id. This is just supplying the interface that a provider can utilize in the future.
1 parent bf3be25 commit 0e09884

File tree

6 files changed

+203
-5
lines changed

6 files changed

+203
-5
lines changed

src/Audio/PendingRequest.php

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ class PendingRequest
1919
use ConfiguresProviders;
2020
use HasProviderOptions;
2121

22-
protected string|Audio $input;
22+
protected string|Audio|int $input;
2323

2424
protected string $voice;
2525

26-
public function withInput(string|Audio $input): self
26+
public function withInput(string|Audio|int $input): self
2727
{
2828
$this->input = $input;
2929

@@ -59,6 +59,28 @@ public function asText(): TextResponse
5959
}
6060
}
6161

62+
public function asTextProviderId(): ProviderIdResponse
63+
{
64+
$request = $this->toSpeechToTextRequest();
65+
66+
try {
67+
return $this->provider->speechToTextProviderId($request);
68+
} catch (RequestException $e) {
69+
$this->provider->handleRequestException($request->model(), $e);
70+
}
71+
}
72+
73+
public function asTextAsync(): TextResponse
74+
{
75+
$request = $this->toSpeechToTextAsyncRequest();
76+
77+
try {
78+
return $this->provider->speechToTextAsync($request);
79+
} catch (RequestException $e) {
80+
$this->provider->handleRequestException($request->model(), $e);
81+
}
82+
}
83+
6284
protected function toTextToSpeechRequest(): TextToSpeechRequest
6385
{
6486
if (! is_string($this->input)) {
@@ -91,4 +113,20 @@ protected function toSpeechToTextRequest(): SpeechToTextRequest
91113
providerOptions: $this->providerOptions,
92114
);
93115
}
116+
117+
protected function toSpeechToTextAsyncRequest(): SpeechToTextAsyncRequest
118+
{
119+
if (! is_string($this->input) && ! is_int($this->input)) {
120+
throw new InvalidArgumentException('Async speech-to-text requires the input be the Provider ID as a string or integer');
121+
}
122+
123+
return new SpeechToTextAsyncRequest(
124+
model: $this->model,
125+
providerKey: $this->providerKey(),
126+
input: $this->input,
127+
clientOptions: $this->clientOptions,
128+
clientRetry: $this->clientRetry,
129+
providerOptions: $this->providerOptions,
130+
);
131+
}
94132
}

src/Audio/ProviderIdResponse.php

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<?php
2+
declare(strict_types=1);
3+
4+
namespace Prism\Prism\Audio;
5+
6+
use Prism\Prism\ValueObjects\Usage;
7+
8+
readonly class ProviderIdResponse
9+
{
10+
public function __construct(
11+
public string|int $id,
12+
public ?Usage $usage = null,
13+
/** @var array<string,mixed> */
14+
public array $additionalContent = []
15+
) {}
16+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<?php
2+
declare(strict_types=1);
3+
4+
namespace Prism\Prism\Audio;
5+
6+
use Closure;
7+
use Prism\Prism\Concerns\ChecksSelf;
8+
use Prism\Prism\Concerns\HasProviderOptions;
9+
use Prism\Prism\Contracts\PrismRequest;
10+
11+
class SpeechToTextAsyncRequest implements PrismRequest
12+
{
13+
use ChecksSelf, HasProviderOptions;
14+
15+
public function __construct(
16+
protected string $model,
17+
protected string $providerKey,
18+
protected string|int $input,
19+
protected array $clientOptions,
20+
protected array $clientRetry,
21+
array $providerOptions = [],
22+
) {
23+
$this->providerOptions = $providerOptions;
24+
}
25+
26+
/**
27+
* @return array{0: array<int, int>|int, 1?: Closure|int, 2?: ?callable, 3?: bool}
28+
*/
29+
public function clientRetry(): array
30+
{
31+
return $this->clientRetry;
32+
}
33+
34+
/**
35+
* @return array<string, mixed>
36+
*/
37+
public function clientOptions(): array
38+
{
39+
return $this->clientOptions;
40+
}
41+
42+
public function input(): string|int
43+
{
44+
return $this->input;
45+
}
46+
47+
public function model(): string
48+
{
49+
return $this->model;
50+
}
51+
52+
public function provider(): string
53+
{
54+
return $this->providerKey;
55+
}
56+
}

src/Providers/Provider.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
use Generator;
88
use Illuminate\Http\Client\RequestException;
99
use Prism\Prism\Audio\AudioResponse as TextToSpeechResponse;
10+
use Prism\Prism\Audio\ProviderIdResponse;
11+
use Prism\Prism\Audio\SpeechToTextAsyncRequest;
1012
use Prism\Prism\Audio\SpeechToTextRequest;
1113
use Prism\Prism\Audio\TextResponse as SpeechToTextResponse;
1214
use Prism\Prism\Audio\TextToSpeechRequest;
@@ -56,6 +58,16 @@ public function speechToText(SpeechToTextRequest $request): SpeechToTextResponse
5658
throw PrismException::unsupportedProviderAction('speechToText', class_basename($this));
5759
}
5860

61+
public function speechToTextProviderId(SpeechToTextRequest $request): ProviderIdResponse
62+
{
63+
throw PrismException::unsupportedProviderAction('speechToTextProviderId', class_basename($this));
64+
}
65+
66+
public function speechToTextAsync(SpeechToTextAsyncRequest $request): SpeechToTextResponse
67+
{
68+
throw PrismException::unsupportedProviderAction('speechToTextAsync', class_basename($this));
69+
}
70+
5971
/**
6072
* @return Generator<StreamEvent>
6173
*/

tests/Audio/PendingRequestTest.php

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
use Prism\Prism\Audio\PendingRequest;
6+
use Prism\Prism\Audio\ProviderIdResponse;
7+
use Prism\Prism\Audio\SpeechToTextAsyncRequest;
8+
use Prism\Prism\Audio\SpeechToTextRequest;
9+
use Prism\Prism\Providers\Provider as ProviderContract;
10+
use Prism\Prism\ValueObjects\Media\Audio;
11+
use Tests\TestDoubles\TestProvider;
12+
13+
beforeEach(function (): void {
14+
$this->pendingRequest = new PendingRequest;
15+
});
16+
17+
test('it generates a provider id response for speech to text', function (): void {
18+
resolve('prism-manager')->extend('test-provider', fn ($config): ProviderContract => new TestProvider);
19+
20+
$audio = Audio::fromUrl('https://example.com/audio.mp3', 'audio/mpeg');
21+
22+
$response = $this->pendingRequest
23+
->using('test-provider', 'test-model')
24+
->withInput($audio)
25+
->asTextProviderId();
26+
27+
$provider = $this->pendingRequest->provider();
28+
29+
expect($response)
30+
->toBeInstanceOf(ProviderIdResponse::class)
31+
->and($response->id)->toBe('provider-id')
32+
->and($provider->request)->toBeInstanceOf(SpeechToTextRequest::class)
33+
->and($provider->request->input())->toBe($audio);
34+
});
35+
36+
test('it generates a response for async speech to text', function (): void {
37+
resolve('prism-manager')->extend('test-provider', fn ($config): ProviderContract => new TestProvider);
38+
39+
$providerId = 'provider-id-123';
40+
41+
$response = $this->pendingRequest
42+
->using('test-provider', 'test-model')
43+
->withInput($providerId)
44+
->asTextAsync();
45+
46+
$provider = $this->pendingRequest->provider();
47+
48+
expect($response->text)->toBe('Async transcript')
49+
->and($provider->request)->toBeInstanceOf(SpeechToTextAsyncRequest::class)
50+
->and($provider->request->model())->toBe('test-model')
51+
->and($provider->request->input())->toBe($providerId);
52+
});

tests/TestDoubles/TestProvider.php

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
namespace Tests\TestDoubles;
66

77
use Generator;
8+
use Prism\Prism\Audio\ProviderIdResponse;
9+
use Prism\Prism\Audio\SpeechToTextAsyncRequest;
10+
use Prism\Prism\Audio\SpeechToTextRequest;
11+
use Prism\Prism\Audio\TextResponse as AudioTextResponse;
812
use Prism\Prism\Embeddings\Request as EmbeddingRequest;
913
use Prism\Prism\Embeddings\Response as EmbeddingResponse;
1014
use Prism\Prism\Enums\FinishReason;
@@ -24,15 +28,15 @@
2428

2529
class TestProvider extends Provider
2630
{
27-
public StructuredRequest|TextRequest|EmbeddingRequest|ImageRequest $request;
31+
public StructuredRequest|TextRequest|EmbeddingRequest|ImageRequest|SpeechToTextRequest|SpeechToTextAsyncRequest $request;
2832

2933
/** @var array<string, mixed> */
3034
public array $clientOptions;
3135

3236
/** @var array<mixed> */
3337
public array $clientRetry;
3438

35-
/** @var array<int, StructuredResponse|TextResponse|EmbeddingResponse|ImageResponse> */
39+
/** @var array<int, StructuredResponse|TextResponse|EmbeddingResponse|ImageResponse|AudioTextResponse|ProviderIdResponse> */
3640
public array $responses = [];
3741

3842
public $callCount = 0;
@@ -115,7 +119,27 @@ public function stream(TextRequest $request): Generator
115119
throw PrismException::unsupportedProviderAction(__METHOD__, class_basename($this));
116120
}
117121

118-
public function withResponse(StructuredResponse|TextResponse $response): Provider
122+
#[\Override]
123+
public function speechToTextProviderId(SpeechToTextRequest $request): ProviderIdResponse
124+
{
125+
$this->callCount++;
126+
127+
$this->request = $request;
128+
129+
return $this->responses[$this->callCount - 1] ?? new ProviderIdResponse('provider-id');
130+
}
131+
132+
#[\Override]
133+
public function speechToTextAsync(SpeechToTextAsyncRequest $request): AudioTextResponse
134+
{
135+
$this->callCount++;
136+
137+
$this->request = $request;
138+
139+
return $this->responses[$this->callCount - 1] ?? new AudioTextResponse('Async transcript');
140+
}
141+
142+
public function withResponse(StructuredResponse|TextResponse|EmbeddingResponse|ImageResponse|AudioTextResponse|ProviderIdResponse $response): Provider
119143
{
120144
$this->responses[] = $response;
121145

0 commit comments

Comments
 (0)