From 09459ae22cb76633932f2c1cabfb4d92ed395dc1 Mon Sep 17 00:00:00 2001 From: Abdurrehman Subhani Date: Mon, 2 Dec 2024 15:30:37 +0500 Subject: [PATCH 1/6] add initial realtimeSTT support --- .../react-agents/util/audio-perception.mjs | 68 +++++++++ .../react-agents/util/realtime-stt.mjs | 135 ++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 packages/usdk/packages/upstreet-agent/packages/react-agents/util/realtime-stt.mjs diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs index be879f390..cbda0e743 100644 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs @@ -388,4 +388,72 @@ export const transcribeRealtime = ({ console.warn('error creating transcription', error); throw error; } +}; + +export const transcribeRealtimeSTT = ({}) => { + try { + const stt = new RealtimeSTT(); + const transcription = new EventTarget(); + + stt.dataSocket.addEventListener('message', (e) => { + try { + const result = JSON.parse(e.data.toString()); + if (result.type === 'realtime') { + transcription.dispatchEvent(new MessageEvent('partial', { + data: { + transcript: result.text, + }, + })); + } else if (result.type === 'fullSentence') { + transcription.dispatchEvent(new MessageEvent('transcription', { + data: { + transcript: result.text, + }, + })); + } + } catch (err) { + console.warn('Error parsing STT message:', err); + } + }); + + stt.controlSocket.addEventListener('open', () => { + transcription.dispatchEvent(new MessageEvent('open', { + data: null, + })); + }); + + stt.controlSocket.addEventListener('error', (e) => { + console.warn('STT error:', e); + transcription.dispatchEvent(new MessageEvent('error', { + data: e, + })); + }); + + stt.controlSocket.addEventListener('close', () => { + transcription.dispatchEvent(new MessageEvent('close', { + data: null, + })); + }); + + // Start the STT service + stt.start().catch(err => { + console.error('Failed to start STT:', err); + throw err; + }); + + // Add write and close methods to transcription + transcription.write = async (f32) => { + const i16 = floatTo16Bit(f32); + stt.handleIncomingData(Buffer.from(i16.buffer)); + }; + + transcription.close = () => { + stt.stop(); + }; + + return transcription; + } catch (error) { + console.warn('error creating STT transcription:', error); + throw error; + } }; \ No newline at end of file diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/realtime-stt.mjs b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/realtime-stt.mjs new file mode 100644 index 000000000..82bd32788 --- /dev/null +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/realtime-stt.mjs @@ -0,0 +1,135 @@ +import WebSocket from 'ws'; +import { serverConfig } from './config'; + +const SAMPLE_RATE = 16000; +const serverConfig = { + host: 'xxxxx', + controlPort: 8011, + dataPort: 8012, +}; + +class RealtimeSTT { + constructor() { + this.controlSocket = null; + this.dataSocket = null; + this.recordingStream = null; + this.sampleRate = SAMPLE_RATE; + + // create the WebSocket URLs from config + this.controlUrl = `ws://${serverConfig.host}:${serverConfig.controlPort}`; + this.dataUrl = `ws://${serverConfig.host}:${serverConfig.dataPort}`; + } + + async start() { + console.log('Connecting to RealtimeSTT server...'); + + // Connect to control socket using config + this.controlSocket = new WebSocket(this.controlUrl); + this.controlSocket.on('open', () => { + console.log('Control socket connected to:', this.controlUrl); + + // Send all configuration parameters + const configs = [ + { + command: "set_parameter", + parameter: "language", + value: "en" + }, + ]; + + // Send each configuration command + configs.forEach(config => { + this.controlSocket.send(JSON.stringify(config)); + console.log(`Configured ${config.parameter}: ${config.value}`); + }); + }); + + this.controlSocket.on('message', (data) => { + console.log('Control message:', data.toString()); + }); + + // Connect to data socket using config + this.dataSocket = new WebSocket(this.dataUrl); + this.dataSocket.on('open', () => { + console.log('Data socket connected to:', this.dataUrl); + }); + + this.dataSocket.on('message', (data) => { + try { + const result = JSON.parse(data.toString()); + if (result.type === 'realtime') { + process.stdout.write(`\rPartial: ${result.text}`); + } else if (result.type === 'fullSentence') { + console.log(`\nFinal: ${result.text}\n`); + } + } catch (err) { + console.log('Raw message:', data.toString()); + } + }); + + // Wait for both connections + await Promise.all([ + new Promise(resolve => this.controlSocket.once('open', resolve)), + new Promise(resolve => this.dataSocket.once('open', resolve)) + ]); + + console.log('Ready to receive streaming data...'); + + // Function to handle incoming streaming data + this.handleIncomingData = (chunk) => { + if (this.dataSocket.readyState === WebSocket.OPEN) { + // Prepare metadata as in Python client + const metadata = { + sampleRate: this.sampleRate, + channels: 1, + encoding: 'PCM16' + }; + const metadataStr = JSON.stringify(metadata); + const metadataLength = Buffer.alloc(4); + metadataLength.writeUInt32LE(metadataStr.length); + + // Combine metadata and audio data + const message = Buffer.concat([ + metadataLength, + Buffer.from(metadataStr), + chunk + ]); + + this.dataSocket.send(message); + } + }; + + console.log('\nListening for incoming data... (Press Ctrl+C to stop)\n'); + } + + stop() { + if (this.recordingStream) { + this.recordingStream.destroy(); + } + if (this.controlSocket) { + this.controlSocket.close(); + } + if (this.dataSocket) { + this.dataSocket.close(); + } + console.log('\nStopped recording and closed connections'); + } +} + +export default RealtimeSTT; + +// // Start the test +// const testClient = new RealtimeSTT(); + +// // Handle graceful shutdown +// process.on('SIGINT', () => { +// console.log('\nShutting down...'); +// testClient.stop(); +// process.exit(0); +// }); + +// // Start the test +// testClient.start().catch(err => { +// console.error('Error:', err); +// process.exit(1); +// }); \ No newline at end of file From ce3dacffdb16dd2ac31b9f43b19b8db30d46a7a2 Mon Sep 17 00:00:00 2001 From: Abdurrehman Subhani Date: Mon, 2 Dec 2024 15:49:09 +0500 Subject: [PATCH 2/6] add in realtimestt to audio transcriber --- .../devices/audio-transcriber.mjs | 100 +++++++++++++++++- .../lib/discord/discord-client.js | 10 +- 2 files changed, 108 insertions(+), 2 deletions(-) diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/devices/audio-transcriber.mjs b/packages/usdk/packages/upstreet-agent/packages/react-agents/devices/audio-transcriber.mjs index cdfc304f6..d8c90e984 100644 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/devices/audio-transcriber.mjs +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/devices/audio-transcriber.mjs @@ -1,4 +1,4 @@ -import { transcribeRealtime } from '../util/audio-perception.mjs'; +import { transcribeRealtime, transcribeRealtimeSTT } from '../util/audio-perception.mjs'; import { resample } from 'codecs/resample.mjs'; import { AudioChunker } from '../util/audio-chunker.mjs'; @@ -112,4 +112,102 @@ export class TranscribedVoiceInput extends EventTarget { // data: null, // })); } +} + +export class RealtimeTranscribedVoiceInput extends EventTarget { + static transcribeSampleRate = 16000; + abortController; + + constructor({ + audioInput, // EventEmitter + sampleRate, + codecs, + }) { + if (!audioInput) { + throw new Error('no audio input'); + } + if (!sampleRate) { + throw new Error('no sample rate'); + } + if (!codecs) { + throw new Error('no codecs'); + } + + super(); + + this.abortController = new AbortController(); + const { + signal, + } = this.abortController; + + (async () => { + const transcription = transcribeRealtimeSTT({ + sampleRate: RealtimeTranscribedVoiceInput.transcribeSampleRate, + }); + + // Forward all relevant events + transcription.addEventListener('partial', e => { + console.log('got partial transcription', e); + }); + + transcription.addEventListener('transcription', e => { + console.log('got full transcription', e); + this.dispatchEvent(new MessageEvent('transcription', { + data: { + transcript: e.data.transcript, + }, + })); + }); + + signal.addEventListener('abort', () => { + transcription.close(); + }); + + const openPromise = new Promise((accept, reject) => { + transcription.addEventListener('open', e => { + accept(null); + }); + transcription.addEventListener('error', e => { + reject(e); + }); + }); + + const audioChunker = new AudioChunker({ + sampleRate: RealtimeTranscribedVoiceInput.transcribeSampleRate, + chunkSize: 1536, + }); + + const ondata = async (f32) => { + await openPromise; + + // resample if needed + if (sampleRate !== RealtimeTranscribedVoiceInput.transcribeSampleRate) { + f32 = resample(f32, sampleRate, RealtimeTranscribedVoiceInput.transcribeSampleRate); + } + + const frames = audioChunker.write(f32); + for (const frame of frames) { + transcription.write(frame); + } + }; + audioInput.on('data', ondata); + + const onend = () => { + this.close(); + }; + audioInput.on('end', onend); + + const cleanup = () => { + audioInput.removeListener('data', ondata); + audioInput.removeListener('end', onend); + }; + signal.addEventListener('abort', () => { + cleanup(); + }); + })(); + } + + close() { + this.abortController.abort(); + } } \ No newline at end of file diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/lib/discord/discord-client.js b/packages/usdk/packages/upstreet-agent/packages/react-agents/lib/discord/discord-client.js index bd467de2b..63841309d 100644 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/lib/discord/discord-client.js +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/lib/discord/discord-client.js @@ -20,6 +20,7 @@ import { discordBotEndpointUrl, } from '../../util/endpoints.mjs'; import { + RealtimeTranscribedVoiceInput, TranscribedVoiceInput, } from '../../devices/audio-transcriber.mjs'; @@ -243,7 +244,14 @@ export class DiscordOutput extends EventTarget { this.userStreams.delete(userId); }); - const transcribedVoiceInput = new TranscribedVoiceInput({ + // const transcribedVoiceInput = new TranscribedVoiceInput({ + // audioInput: userStream, + // sampleRate, + // codecs, + // jwt, + // }); + + const transcribedVoiceInput = new RealtimeTranscribedVoiceInput({ audioInput: userStream, sampleRate, codecs, From 4b14490572226694a05fefe78d5d46db2db842ff Mon Sep 17 00:00:00 2001 From: Abdurrehman Subhani Date: Mon, 2 Dec 2024 16:22:30 +0500 Subject: [PATCH 3/6] refactor transcribeRealtimeSTT --- .../react-agents/classes/discord-manager.ts | 4 + .../devices/audio-transcriber.mjs | 2 +- .../react-agents/util/audio-perception.mjs | 105 +++++++++++++----- 3 files changed, 80 insertions(+), 31 deletions(-) diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/classes/discord-manager.ts b/packages/usdk/packages/upstreet-agent/packages/react-agents/classes/discord-manager.ts index 65b52c24b..d5f1d468b 100644 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/classes/discord-manager.ts +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/classes/discord-manager.ts @@ -169,6 +169,10 @@ export class DiscordBot extends EventTarget { const status = await discordBotClient.status(); if (signal.aborted) return; + if (status.error) { + throw new Error(`Discord connection error: ${status.error}`); + } + console.log('discord connect 2'); let connectableChannels = status.channels .filter((channel: any) => [0, 2].includes(channel.type)); diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/devices/audio-transcriber.mjs b/packages/usdk/packages/upstreet-agent/packages/react-agents/devices/audio-transcriber.mjs index d8c90e984..bac0687aa 100644 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/devices/audio-transcriber.mjs +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/devices/audio-transcriber.mjs @@ -141,7 +141,7 @@ export class RealtimeTranscribedVoiceInput extends EventTarget { } = this.abortController; (async () => { - const transcription = transcribeRealtimeSTT({ + const transcription = await transcribeRealtimeSTT({ sampleRate: RealtimeTranscribedVoiceInput.transcribeSampleRate, }); diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs index cbda0e743..094f69d66 100644 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs @@ -390,14 +390,53 @@ export const transcribeRealtime = ({ } }; -export const transcribeRealtimeSTT = ({}) => { +export const transcribeRealtimeSTT = async ({ sampleRate }) => { + const serverConfig = { + host: 'xxxxx', + controlPort: 8011, + dataPort: 8012, + }; + + if (!sampleRate) { + throw new Error('no sample rate'); + } + try { - const stt = new RealtimeSTT(); + const controlUrl = `ws://${serverConfig.host}:${serverConfig.controlPort}`; + const dataUrl = `ws://${serverConfig.host}:${serverConfig.dataPort}`; + + const controlSocket = new WebSocket(controlUrl); + const dataSocket = new WebSocket(dataUrl); + const transcription = new EventTarget(); - stt.dataSocket.addEventListener('message', (e) => { + controlSocket.addEventListener('open', () => { + console.log('Control socket connected to:', controlUrl); + const configs = [ + { + command: "set_parameter", + parameter: "language", + value: "en" + }, + ]; + configs.forEach(config => { + controlSocket.send(JSON.stringify(config)); + console.log(`Configured ${config.parameter}: ${config.value}`); + }); + transcription.dispatchEvent(new MessageEvent('open', { data: null })); + }); + + controlSocket.addEventListener('message', (data) => { + console.log('Control message:', data); + }); + + dataSocket.addEventListener('open', () => { + console.log('Data socket connected to:', dataUrl); + }); + + dataSocket.addEventListener('message', (data) => { try { - const result = JSON.parse(e.data.toString()); + const result = JSON.parse(data.data); if (result.type === 'realtime') { transcription.dispatchEvent(new MessageEvent('partial', { data: { @@ -412,43 +451,49 @@ export const transcribeRealtimeSTT = ({}) => { })); } } catch (err) { - console.warn('Error parsing STT message:', err); + console.log('Raw message:', data.data); } }); - stt.controlSocket.addEventListener('open', () => { - transcription.dispatchEvent(new MessageEvent('open', { - data: null, - })); - }); - - stt.controlSocket.addEventListener('error', (e) => { - console.warn('STT error:', e); - transcription.dispatchEvent(new MessageEvent('error', { - data: e, - })); - }); - - stt.controlSocket.addEventListener('close', () => { - transcription.dispatchEvent(new MessageEvent('close', { - data: null, - })); + controlSocket.addEventListener('error', (e) => { + console.warn('Control socket error:', e); + transcription.dispatchEvent(new MessageEvent('error', { data: e })); }); - // Start the STT service - stt.start().catch(err => { - console.error('Failed to start STT:', err); - throw err; + dataSocket.addEventListener('error', (e) => { + console.warn('Data socket error:', e); + transcription.dispatchEvent(new MessageEvent('error', { data: e })); }); - // Add write and close methods to transcription - transcription.write = async (f32) => { + transcription.write = (f32) => { const i16 = floatTo16Bit(f32); - stt.handleIncomingData(Buffer.from(i16.buffer)); + const metadata = { + sampleRate: sampleRate, + channels: 1, + encoding: 'PCM16' + }; + const metadataStr = JSON.stringify(metadata); + const metadataLength = Buffer.alloc(4); + metadataLength.writeUInt32LE(metadataStr.length); + + const message = Buffer.concat([ + metadataLength, + Buffer.from(metadataStr), + Buffer.from(i16.buffer) + ]); + + if (dataSocket.readyState === WebSocket.OPEN) { + dataSocket.send(message); + } }; transcription.close = () => { - stt.stop(); + if (controlSocket.readyState === WebSocket.OPEN) { + controlSocket.close(); + } + if (dataSocket.readyState === WebSocket.OPEN) { + dataSocket.close(); + } }; return transcription; From 52b2da732ba3dec86d9a6bede8d8195cba758799 Mon Sep 17 00:00:00 2001 From: Abdurrehman Subhani Date: Mon, 2 Dec 2024 16:55:01 +0500 Subject: [PATCH 4/6] move stt port and endpoint to endpoints for consistency --- .../react-agents/util/audio-perception.mjs | 16 +++++++--------- .../packages/react-agents/util/endpoints.mjs | 6 +++++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs index 094f69d66..f6875dc1a 100644 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs @@ -7,6 +7,9 @@ import { AudioEncodeStream } from 'codecs/audio-encode.mjs'; import { QueueManager } from 'queue-manager'; import { aiHost, + realtimeSTTHost, + realtimeSTTControlPort, + realtimeSTTDataPort, } from './endpoints.mjs'; const defaultTranscriptionModel = 'whisper-1'; @@ -391,19 +394,14 @@ export const transcribeRealtime = ({ }; export const transcribeRealtimeSTT = async ({ sampleRate }) => { - const serverConfig = { - host: 'xxxxx', - controlPort: 8011, - dataPort: 8012, - }; if (!sampleRate) { throw new Error('no sample rate'); } try { - const controlUrl = `ws://${serverConfig.host}:${serverConfig.controlPort}`; - const dataUrl = `ws://${serverConfig.host}:${serverConfig.dataPort}`; + const controlUrl = `ws://${realtimeSTTHost}:${realtimeSTTControlPort}`; + const dataUrl = `ws://${realtimeSTTHost}:${realtimeSTTDataPort}`; const controlSocket = new WebSocket(controlUrl); const dataSocket = new WebSocket(dataUrl); @@ -411,7 +409,7 @@ export const transcribeRealtimeSTT = async ({ sampleRate }) => { const transcription = new EventTarget(); controlSocket.addEventListener('open', () => { - console.log('Control socket connected to:', controlUrl); + console.log('Control socket connected'); const configs = [ { command: "set_parameter", @@ -431,7 +429,7 @@ export const transcribeRealtimeSTT = async ({ sampleRate }) => { }); dataSocket.addEventListener('open', () => { - console.log('Data socket connected to:', dataUrl); + console.log('Data socket connected'); }); dataSocket.addEventListener('message', (data) => { diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/endpoints.mjs b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/endpoints.mjs index e602d0b91..f32cf9a26 100644 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/endpoints.mjs +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/endpoints.mjs @@ -8,4 +8,8 @@ export const chatEndpointUrl = `https://chat.upstreet.ai`; export const discordBotEndpointUrl = `https://discord-bot-upstreet.fly.dev`; export const telnyxEndpointUrl = 'https://telnyx.isekaichat.workers.dev'; export const workersHost = `isekaichat.workers.dev`; -export const usdkDiscordUrl = `https://upstreet.ai/usdk-discord`; \ No newline at end of file +export const usdkDiscordUrl = `https://upstreet.ai/usdk-discord`; + +export const realtimeSTTHost = `xxxxx`; +export const realtimeSTTControlPort = 8011; +export const realtimeSTTDataPort = 8012; From 0d82ba68a89e11af5156c24a7af81b08f061a236 Mon Sep 17 00:00:00 2001 From: Abdurrehman Subhani Date: Tue, 3 Dec 2024 17:39:06 +0500 Subject: [PATCH 5/6] remove unused realtime-stt class --- .../react-agents/util/realtime-stt.mjs | 135 ------------------ 1 file changed, 135 deletions(-) delete mode 100644 packages/usdk/packages/upstreet-agent/packages/react-agents/util/realtime-stt.mjs diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/realtime-stt.mjs b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/realtime-stt.mjs deleted file mode 100644 index 82bd32788..000000000 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/realtime-stt.mjs +++ /dev/null @@ -1,135 +0,0 @@ -import WebSocket from 'ws'; -import { serverConfig } from './config'; - -const SAMPLE_RATE = 16000; -const serverConfig = { - host: 'xxxxx', - controlPort: 8011, - dataPort: 8012, -}; - -class RealtimeSTT { - constructor() { - this.controlSocket = null; - this.dataSocket = null; - this.recordingStream = null; - this.sampleRate = SAMPLE_RATE; - - // create the WebSocket URLs from config - this.controlUrl = `ws://${serverConfig.host}:${serverConfig.controlPort}`; - this.dataUrl = `ws://${serverConfig.host}:${serverConfig.dataPort}`; - } - - async start() { - console.log('Connecting to RealtimeSTT server...'); - - // Connect to control socket using config - this.controlSocket = new WebSocket(this.controlUrl); - this.controlSocket.on('open', () => { - console.log('Control socket connected to:', this.controlUrl); - - // Send all configuration parameters - const configs = [ - { - command: "set_parameter", - parameter: "language", - value: "en" - }, - ]; - - // Send each configuration command - configs.forEach(config => { - this.controlSocket.send(JSON.stringify(config)); - console.log(`Configured ${config.parameter}: ${config.value}`); - }); - }); - - this.controlSocket.on('message', (data) => { - console.log('Control message:', data.toString()); - }); - - // Connect to data socket using config - this.dataSocket = new WebSocket(this.dataUrl); - this.dataSocket.on('open', () => { - console.log('Data socket connected to:', this.dataUrl); - }); - - this.dataSocket.on('message', (data) => { - try { - const result = JSON.parse(data.toString()); - if (result.type === 'realtime') { - process.stdout.write(`\rPartial: ${result.text}`); - } else if (result.type === 'fullSentence') { - console.log(`\nFinal: ${result.text}\n`); - } - } catch (err) { - console.log('Raw message:', data.toString()); - } - }); - - // Wait for both connections - await Promise.all([ - new Promise(resolve => this.controlSocket.once('open', resolve)), - new Promise(resolve => this.dataSocket.once('open', resolve)) - ]); - - console.log('Ready to receive streaming data...'); - - // Function to handle incoming streaming data - this.handleIncomingData = (chunk) => { - if (this.dataSocket.readyState === WebSocket.OPEN) { - // Prepare metadata as in Python client - const metadata = { - sampleRate: this.sampleRate, - channels: 1, - encoding: 'PCM16' - }; - const metadataStr = JSON.stringify(metadata); - const metadataLength = Buffer.alloc(4); - metadataLength.writeUInt32LE(metadataStr.length); - - // Combine metadata and audio data - const message = Buffer.concat([ - metadataLength, - Buffer.from(metadataStr), - chunk - ]); - - this.dataSocket.send(message); - } - }; - - console.log('\nListening for incoming data... (Press Ctrl+C to stop)\n'); - } - - stop() { - if (this.recordingStream) { - this.recordingStream.destroy(); - } - if (this.controlSocket) { - this.controlSocket.close(); - } - if (this.dataSocket) { - this.dataSocket.close(); - } - console.log('\nStopped recording and closed connections'); - } -} - -export default RealtimeSTT; - -// // Start the test -// const testClient = new RealtimeSTT(); - -// // Handle graceful shutdown -// process.on('SIGINT', () => { -// console.log('\nShutting down...'); -// testClient.stop(); -// process.exit(0); -// }); - -// // Start the test -// testClient.start().catch(err => { -// console.error('Error:', err); -// process.exit(1); -// }); \ No newline at end of file From 3eb7c414369a10f1cf57abcd6a2f861755ae04ce Mon Sep 17 00:00:00 2001 From: Abdurrehman Subhani Date: Wed, 4 Dec 2024 17:40:08 +0500 Subject: [PATCH 6/6] point to runpod rtstt server instance --- .../packages/react-agents/util/audio-perception.mjs | 9 ++++++--- .../packages/react-agents/util/endpoints.mjs | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs index f6875dc1a..7e66d0c9d 100644 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/audio-perception.mjs @@ -394,14 +394,17 @@ export const transcribeRealtime = ({ }; export const transcribeRealtimeSTT = async ({ sampleRate }) => { - if (!sampleRate) { throw new Error('no sample rate'); } try { - const controlUrl = `ws://${realtimeSTTHost}:${realtimeSTTControlPort}`; - const dataUrl = `ws://${realtimeSTTHost}:${realtimeSTTDataPort}`; + // const controlUrl = `ws://${serverConfig.host}:${serverConfig.controlPort}`; + // const dataUrl = `ws://${serverConfig.host}:${serverConfig.dataPort}`; + + const controlUrl = `wss://${realtimeSTTHost}-${realtimeSTTControlPort}.proxy.runpod.net`; + const dataUrl = `wss://${realtimeSTTHost}-${realtimeSTTDataPort}.proxy.runpod.net`; + const controlSocket = new WebSocket(controlUrl); const dataSocket = new WebSocket(dataUrl); diff --git a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/endpoints.mjs b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/endpoints.mjs index f32cf9a26..7d2aa8c51 100644 --- a/packages/usdk/packages/upstreet-agent/packages/react-agents/util/endpoints.mjs +++ b/packages/usdk/packages/upstreet-agent/packages/react-agents/util/endpoints.mjs @@ -10,6 +10,6 @@ export const telnyxEndpointUrl = 'https://telnyx.isekaichat.workers.dev'; export const workersHost = `isekaichat.workers.dev`; export const usdkDiscordUrl = `https://upstreet.ai/usdk-discord`; -export const realtimeSTTHost = `xxxxx`; +export const realtimeSTTHost = `g8cgs1qznz533i`; export const realtimeSTTControlPort = 8011; export const realtimeSTTDataPort = 8012;