Skip to content
This repository was archived by the owner on Jan 7, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cmd/transcribe.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@ speechly transcribe files.jsonl --model /path/to/model/bundle`,
inputPath := args[0]

if model != "" {
results, err := transcribeOnDevice(model, inputPath)
bs, err := cmd.Flags().GetInt("block-size")
if err != nil {
bs = 20
}
results, err := transcribeOnDevice(model, inputPath, bs)
printResults(results, inputPath, err == nil)
if err != nil {
log.Fatalf("Transcribing failed: %v", err)
Expand Down Expand Up @@ -79,6 +83,7 @@ func printResults(results []AudioCorpusItem, inputPath string, reportErrors bool
func init() {
transcribeCmd.Flags().StringP("app", "a", "", "Application ID to use for cloud transcription")
transcribeCmd.Flags().StringP("model", "m", "", "Model bundle file. This feature is available on Enterprise plans (https://speechly.com/pricing)")
transcribeCmd.Flags().Int("block-size", 20, "Block size to be used with the on-device decoder. (Enterprise plans only.)")
transcribeCmd.Flags().Bool("streaming", false, "Use the Streaming API instead of the Batch API.")
RootCmd.AddCommand(transcribeCmd)
}
Expand Down
67 changes: 60 additions & 7 deletions cmd/transcribe_on_device.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ package cmd

/*
#cgo CFLAGS: -I${SRCDIR}/../decoder/include
#cgo darwin LDFLAGS: -L${SRCDIR}/../decoder/lib -Wl,-rpath,decoder/lib -lspeechly -lz -framework Foundation -lc++ -framework Security
#cgo darwin LDFLAGS: -L${SRCDIR}/../decoder/lib -Wl,-rpath,decoder/lib -lspeechlyDecoder -lz -framework Foundation -lc++ -framework Security
#cgo linux LDFLAGS: -L${SRCDIR}/../decoder/lib -Wl,-rpath,$ORIGIN/../decoder/lib -Wl,--start-group -lstdc++ -lpthread -ldl -lm -lspeechly -lz
#cgo tflite LDFLAGS: -ltensorflowlite_c
#cgo coreml LDFLAGS: -framework coreml
Expand All @@ -16,27 +16,42 @@ import "C"
import (
"fmt"
"os"
"io"
"path"
"strings"
"unsafe"

"github.com/go-audio/audio"
)

func transcribeOnDevice(model string, corpusPath string) ([]AudioCorpusItem, error) {
ac, err := readAudioCorpus(corpusPath)
const (
sampleBufferSize = 2048
inputBufferSize = 2 * sampleBufferSize
)

func transcribeOnDevice(model string, corpusPath string, blockSize int) ([]AudioCorpusItem, error) {
df, err := NewDecoderFactory(model)
if err != nil {
return nil, err
}

df, err := NewDecoderFactory(model)
if corpusPath == "STDIN" {
d, err := df.NewStream("", blockSize)
if err != nil {
return nil, err
}
return nil, decodeStdin(d)
}

ac, err := readAudioCorpus(corpusPath)
if err != nil {
return nil, err
}

bar := getBar("Transcribing", "utt", len(ac))
var results []AudioCorpusItem
for _, aci := range ac {
d, err := df.NewStream("")
d, err := df.NewStream("", blockSize)
if err != nil {
barClearOnError(bar)
return nil, err
Expand Down Expand Up @@ -64,6 +79,44 @@ func transcribeOnDevice(model string, corpusPath string) ([]AudioCorpusItem, err
return results, nil
}

func decodeStdin(d *cDecoder) (error) {
cErr := C.DecoderError{}
C.Decoder_EnableVAD(d.decoder, 1, &cErr);

fmt.Println("Speechly Decoder ready!")

go func () {
cErr := C.DecoderError{}
buffer := make([]byte, inputBufferSize)
sampleBuffer := make([]float32, sampleBufferSize)
for {
if _, err := io.ReadFull(os.Stdin, buffer); err != nil {
fmt.Println("error:", err)
break;
}
bufferPos := 0
for i := 0; i < inputBufferSize; i += 2 {
s := int16((uint16(buffer[i]) | (uint16(buffer[i + 1]) << 8)))
sampleBuffer[bufferPos] = float32(s) / 32768.0
bufferPos++
}
C.Decoder_WriteSamples(d.decoder, (*C.float)(unsafe.Pointer(&sampleBuffer[0])), C.size_t(sampleBufferSize), C.int(0), &cErr)
}
}()

for {
res := C.Decoder_WaitResults(d.decoder, &cErr)
if cErr.error_code != C.uint(0) {
return fmt.Errorf("failed reading transcript from decoder, error code %d", cErr.error_code)
}
word := C.GoString(res.word)
fmt.Printf("%s ", strings.ToLower(word))
C.CResultWord_Destroy(res)
}

return nil
}

func decodeAudioCorpusItem(audioFilePath string, aci AudioCorpusItem, d *cDecoder) (string, error) {
cErr := C.DecoderError{}

Expand Down Expand Up @@ -141,15 +194,15 @@ type cDecoder struct {
index int
}

func (d *decoderFactory) NewStream(deviceID string) (*cDecoder, error) {
func (d *decoderFactory) NewStream(deviceID string, blockSize int) (*cDecoder, error) {
cDeviceID := C.CString(deviceID)
cErr := C.DecoderError{}
decoder := C.DecoderFactory_GetDecoder(d.factory, cDeviceID, &cErr)
if cErr.error_code != C.uint(0) {
return nil, fmt.Errorf("failed creating decoder instance, error code %d", cErr.error_code)
}
defer C.free(unsafe.Pointer(cDeviceID))
C.Decoder_SetParamI(decoder, C.SPEECHLY_DECODER_BLOCK_MULTIPLIER_I, 20, &cErr);
C.Decoder_SetParamI(decoder, C.SPEECHLY_DECODER_BLOCK_MULTIPLIER_I, C.int(blockSize), &cErr)
return &cDecoder{
decoder: decoder,
}, nil
Expand Down
2 changes: 1 addition & 1 deletion cmd/transcribe_on_device_not_available.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ import (
"fmt"
)

func transcribeOnDevice(bundlePath string, corpusPath string) ([]AudioCorpusItem, error) {
func transcribeOnDevice(bundlePath string, corpusPath string, blockSize int) ([]AudioCorpusItem, error) {
return nil, fmt.Errorf("this version of the Speechly CLI tool does not support on-device transcription")
}