Skip to content

Commit 1487dfc

Browse files
committed
update stt descriptions
1 parent 90e5769 commit 1487dfc

File tree

1 file changed

+50
-0
lines changed

1 file changed

+50
-0
lines changed

jigsawstack/audio.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,44 @@
1010

1111
class SpeechToTextParams(TypedDict):
1212
url: NotRequired[str]
13+
"""
14+
the url of the audio file to transcribe, optional if file_store_key is provided
15+
"""
16+
1317
file_store_key: NotRequired[str]
18+
"""
19+
the file store key of the audio file to transcribe, optional if url is provided
20+
"""
21+
1422
language: NotRequired[Union[str, Literal["auto"]]]
23+
"""
24+
The language to transcribe or translate the file into. Use “auto” for automatic language detection, or specify a language code. If not specified, defaults to automatic detection. All supported language codes can be found
25+
"""
26+
1527
translate: NotRequired[bool]
28+
"""
29+
When set to true, translates the content into English (or the specified language if language parameter is provided)
30+
"""
31+
1632
by_speaker: NotRequired[bool]
33+
"""
34+
Identifies and separates different speakers in the audio file. When enabled, the response will include a speakers array with speaker-segmented transcripts.
35+
"""
36+
1737
webhook_url: NotRequired[str]
38+
"""
39+
Webhook URL to send result to. When provided, the API will process asynchronously and send results to this URL when completed.
40+
"""
41+
1842
batch_size: NotRequired[int]
43+
"""
44+
The batch size to return. Maximum value is 40. This controls how the audio is chunked for processing.
45+
"""
46+
1947
chunk_duration: NotRequired[int]
48+
"""
49+
the duration of each chunk in seconds, defaults to 30
50+
"""
2051

2152

2253
class ChunkParams(TypedDict):
@@ -32,10 +63,29 @@ class BySpeakerParams(ChunkParams):
3263

3364
class SpeechToTextResponse(BaseResponse):
3465
text: str
66+
"""
67+
the text of the transcription
68+
"""
69+
3570
chunks: List[ChunkParams]
71+
"""
72+
the chunks of the transcription
73+
"""
74+
3675
speakers: Optional[List[BySpeakerParams]]
76+
"""
77+
the speakers of the transcription, available if by_speaker is set to true
78+
"""
79+
3780
language_detected: Optional[str]
81+
"""
82+
the language detected in the transcription, available if language is set to auto
83+
"""
84+
3885
confidence: Optional[float]
86+
"""
87+
the confidence of the transcription language detection, available if language is set to auto
88+
"""
3989

4090

4191
class SpeechToTextWebhookResponse(BaseResponse):

0 commit comments

Comments
 (0)