speechly · aukkonen · Mar 7, 2023 · Mar 7, 2023 · Mar 8, 2023 · Mar 8, 2023
diff --git a/cmd/transcribe.go b/cmd/transcribe.go
@@ -27,7 +27,11 @@ speechly transcribe files.jsonl --model /path/to/model/bundle`,
 		inputPath := args[0]
 
 		if model != "" {
-			results, err := transcribeOnDevice(model, inputPath)
+			bs, err := cmd.Flags().GetInt("block-size")
+			if err != nil {
+				bs = 20
+			}
+			results, err := transcribeOnDevice(model, inputPath, bs)
 			printResults(results, inputPath, err == nil)
 			if err != nil {
 				log.Fatalf("Transcribing failed: %v", err)
@@ -79,6 +83,7 @@ func printResults(results []AudioCorpusItem, inputPath string, reportErrors bool
 func init() {
 	transcribeCmd.Flags().StringP("app", "a", "", "Application ID to use for cloud transcription")
 	transcribeCmd.Flags().StringP("model", "m", "", "Model bundle file. This feature is available on Enterprise plans (https://speechly.com/pricing)")
+	transcribeCmd.Flags().Int("block-size", 20, "Block size to be used with the on-device decoder. (Enterprise plans only.)")
 	transcribeCmd.Flags().Bool("streaming", false, "Use the Streaming API instead of the Batch API.")
 	RootCmd.AddCommand(transcribeCmd)
 }

diff --git a/cmd/transcribe_on_device.go b/cmd/transcribe_on_device.go
@@ -5,7 +5,7 @@ package cmd
 
 /*
  #cgo CFLAGS: -I${SRCDIR}/../decoder/include
- #cgo darwin LDFLAGS: -L${SRCDIR}/../decoder/lib -Wl,-rpath,decoder/lib -lspeechly -lz -framework Foundation -lc++ -framework Security
+ #cgo darwin LDFLAGS: -L${SRCDIR}/../decoder/lib -Wl,-rpath,decoder/lib -lspeechlyDecoder -lz -framework Foundation -lc++ -framework Security
  #cgo linux LDFLAGS: -L${SRCDIR}/../decoder/lib -Wl,-rpath,$ORIGIN/../decoder/lib -Wl,--start-group -lstdc++ -lpthread -ldl -lm -lspeechly -lz
  #cgo tflite LDFLAGS: -ltensorflowlite_c
  #cgo coreml LDFLAGS: -framework coreml
@@ -16,27 +16,42 @@ import "C"
 import (
 	"fmt"
 	"os"
+	"io"
 	"path"
 	"strings"
 	"unsafe"
 
 	"github.com/go-audio/audio"
 )
 
-func transcribeOnDevice(model string, corpusPath string) ([]AudioCorpusItem, error) {
-	ac, err := readAudioCorpus(corpusPath)
+const (
+	sampleBufferSize = 2048
+	inputBufferSize  = 2 * sampleBufferSize
+)
+
+func transcribeOnDevice(model string, corpusPath string, blockSize int) ([]AudioCorpusItem, error) {
+	df, err := NewDecoderFactory(model)
 	if err != nil {
 		return nil, err
 	}
 
-	df, err := NewDecoderFactory(model)
+	if corpusPath == "STDIN" {
+		d, err := df.NewStream("", blockSize)
+		if err != nil {
+			return nil, err
+		}
+		return nil, decodeStdin(d)
+	}
+
+	ac, err := readAudioCorpus(corpusPath)
 	if err != nil {
 		return nil, err
 	}
+
 	bar := getBar("Transcribing", "utt", len(ac))
 	var results []AudioCorpusItem
 	for _, aci := range ac {
-		d, err := df.NewStream("")
+		d, err := df.NewStream("", blockSize)
 		if err != nil {
 			barClearOnError(bar)
 			return nil, err
@@ -64,6 +79,44 @@ func transcribeOnDevice(model string, corpusPath string) ([]AudioCorpusItem, err
 	return results, nil
 }
 
+func decodeStdin(d *cDecoder) (error) {
+	cErr := C.DecoderError{}
+	C.Decoder_EnableVAD(d.decoder, 1, &cErr);
+
+	fmt.Println("Speechly Decoder ready!")
+
+	go func () {
+		cErr := C.DecoderError{}
+		buffer := make([]byte, inputBufferSize)
+		sampleBuffer := make([]float32, sampleBufferSize)
+		for {
+			if _, err := io.ReadFull(os.Stdin, buffer); err != nil {
+				fmt.Println("error:", err)
+				break;
+			}
+			bufferPos := 0
+			for i := 0; i < inputBufferSize; i += 2 {
+				s := int16((uint16(buffer[i]) | (uint16(buffer[i + 1]) << 8)))
+				sampleBuffer[bufferPos] = float32(s) / 32768.0
+				bufferPos++
+			}
+			C.Decoder_WriteSamples(d.decoder, (*C.float)(unsafe.Pointer(&sampleBuffer[0])), C.size_t(sampleBufferSize), C.int(0), &cErr)
+		}
+	}()
+
+	for {
+		res := C.Decoder_WaitResults(d.decoder, &cErr)
+		if cErr.error_code != C.uint(0) {
+			return fmt.Errorf("failed reading transcript from decoder, error code %d", cErr.error_code)
+		}
+		word := C.GoString(res.word)
+		fmt.Printf("%s ", strings.ToLower(word))
+		C.CResultWord_Destroy(res)
+	}
+
+	return nil
+}
+
 func decodeAudioCorpusItem(audioFilePath string, aci AudioCorpusItem, d *cDecoder) (string, error) {
 	cErr := C.DecoderError{}
 
@@ -141,15 +194,15 @@ type cDecoder struct {
 	index   int
 }
 
-func (d *decoderFactory) NewStream(deviceID string) (*cDecoder, error) {
+func (d *decoderFactory) NewStream(deviceID string, blockSize int) (*cDecoder, error) {
 	cDeviceID := C.CString(deviceID)
 	cErr := C.DecoderError{}
 	decoder := C.DecoderFactory_GetDecoder(d.factory, cDeviceID, &cErr)
 	if cErr.error_code != C.uint(0) {
 		return nil, fmt.Errorf("failed creating decoder instance, error code %d", cErr.error_code)
 	}
 	defer C.free(unsafe.Pointer(cDeviceID))
-	C.Decoder_SetParamI(decoder, C.SPEECHLY_DECODER_BLOCK_MULTIPLIER_I, 20, &cErr);
+	C.Decoder_SetParamI(decoder, C.SPEECHLY_DECODER_BLOCK_MULTIPLIER_I, C.int(blockSize), &cErr)
 	return &cDecoder{
 		decoder: decoder,
 	}, nil

diff --git a/cmd/transcribe_on_device_not_available.go b/cmd/transcribe_on_device_not_available.go
@@ -7,6 +7,6 @@ import (
 	"fmt"
 )
 
-func transcribeOnDevice(bundlePath string, corpusPath string) ([]AudioCorpusItem, error) {
+func transcribeOnDevice(bundlePath string, corpusPath string, blockSize int) ([]AudioCorpusItem, error) {
 	return nil, fmt.Errorf("this version of the Speechly CLI tool does not support on-device transcription")
 }