diff --git a/.golangci.yaml b/.golangci.yaml index 9b6e6eb5..72989764 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -118,6 +118,12 @@ issues: - path: pkg/client/listen/v1/websocket/new_using_chan.go linters: - gocritic + - path: pkg/client/speak/v1/websocket/client_callback.go + linters: + - dupl + - path: pkg/client/speak/v1/websocket/client_channel.go + linters: + - dupl - path: pkg/client/listen/v1/websocket/client_callback.go linters: - dupl diff --git a/README.md b/README.md index 18c60831..79a4ed09 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,11 @@ For documentation relating to Speech-to-Text (and Intelligence) from PreRecorded For documentation relating to Text-to-Speech: +- WebSocket: + - Speak REST Client - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/websocket](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/websocket) + - Speak REST API - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket) + - Speak API - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket/interfaces](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket/interfaces) + - REST: - Speak REST Client - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/rest](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/rest) - Speak REST API - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/rest](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/rest) @@ -207,6 +212,11 @@ Speech-to-Text - Live Audio: - From a Microphone - [examples/speech-to-text/websocket/microphone](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/speech-to-text/websocket/microphone/main.go) - From an HTTP Endpoint - [examples/speech-to-text/websocket/http](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/speech-to-text/websocket/http/main.go) +Text-to-Speech - WebSocket + +- Websocket Simple Example - [examples/text-to-speech/websocket/simple](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/text-to-speech/websocket/simple/main.go) +- Interactive Websocket - [examples/text-to-speech/websocket/interactive](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/text-to-speech/websocket/interactive/main.go) + Text-to-Speech - REST - Save audio to a Path - [examples/text-to-speech/rest/file](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/text-to-speech/rest/file/main.go) diff --git a/docs.go b/docs.go index d4262d78..e4c68664 100644 --- a/docs.go +++ b/docs.go @@ -30,5 +30,5 @@ import ( _ "github.com/deepgram/deepgram-go-sdk/pkg/api/listen/v1/websocket" _ "github.com/deepgram/deepgram-go-sdk/pkg/api/manage/v1" _ "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/rest" - // _ "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket" + _ "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket" ) diff --git a/examples/speech-to-text/websocket/microphone_channel/main.go b/examples/speech-to-text/websocket/microphone_channel/main.go index a42c7a79..719d2aae 100644 --- a/examples/speech-to-text/websocket/microphone_channel/main.go +++ b/examples/speech-to-text/websocket/microphone_channel/main.go @@ -248,7 +248,7 @@ func main() { callback = *NewMyHandler() // create a Deepgram client - dgClient, err := client.NewWSUsingChan(ctx, "", cOptions, tOptions, &callback) + dgClient, err := client.NewWSUsingChan(ctx, "", cOptions, tOptions, callback) if err != nil { fmt.Println("ERROR creating LiveTranscription connection:", err) return diff --git a/examples/speech-to-text/websocket/replay/main.go b/examples/speech-to-text/websocket/replay/main.go index 85a59f6f..e135e633 100644 --- a/examples/speech-to-text/websocket/replay/main.go +++ b/examples/speech-to-text/websocket/replay/main.go @@ -37,7 +37,7 @@ func main() { } // create a Deepgram client - dgClient, err := client.NewWebSocketForDemo(ctx, options) + dgClient, err := client.NewWSUsingChanForDemo(ctx, options) if err != nil { log.Println("ERROR creating LiveTranscription connection:", err) return diff --git a/examples/speech-to-text/websocket/test/main.go b/examples/speech-to-text/websocket/test/main.go index d222c8a3..527e205b 100644 --- a/examples/speech-to-text/websocket/test/main.go +++ b/examples/speech-to-text/websocket/test/main.go @@ -55,7 +55,7 @@ func main() { } // create a Deepgram client - dgClient, err := client.NewWebSocket(ctx, "", cOptions, tOptions, nil) + dgClient, err := client.NewWSUsingChan(ctx, "", cOptions, tOptions, nil) if err != nil { fmt.Println("ERROR creating LiveTranscription connection:", err) return diff --git a/examples/text-to-speech/websocket/interactive_callback/main.go b/examples/text-to-speech/websocket/interactive_callback/main.go new file mode 100644 index 00000000..cf2d9681 --- /dev/null +++ b/examples/text-to-speech/websocket/interactive_callback/main.go @@ -0,0 +1,203 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package main + +import ( + "bufio" + "context" + "fmt" + "os" + "strings" + "time" + + msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces" + speak "github.com/deepgram/deepgram-go-sdk/pkg/client/speak" +) + +const ( + TTS_TEXT = "Hello, this is a text to speech example using Deepgram." + AUDIO_FILE = "output.wav" +) + +// Implement your own callback +type MyCallback struct{} + +func (c MyCallback) Open(or *msginterfaces.OpenResponse) error { + fmt.Printf("\n[Open] Received\n") + return nil +} + +func (c MyCallback) Metadata(md *msginterfaces.MetadataResponse) error { + fmt.Printf("\n[Metadata] Received\n") + fmt.Printf("Metadata.RequestID: %s\n", strings.TrimSpace(md.RequestID)) + return nil +} + +func (c MyCallback) Binary(byMsg []byte) error { + fmt.Printf("\n[Binary] Received\n") + + file, err := os.OpenFile(AUDIO_FILE, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o666) + if err != nil { + fmt.Printf("Error creating file %s: %v\n", AUDIO_FILE, err) + return err + } + + _, err = file.Write(byMsg) + file.Close() + + if err != nil { + fmt.Printf("Error writing audio data to file: %v\n", err) + return err + } + + return nil +} + +func (c MyCallback) Flush(fl *msginterfaces.FlushedResponse) error { + fmt.Printf("\n[Flushed] Received\n") + fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ") + return nil +} + +func (c MyCallback) Close(cr *msginterfaces.CloseResponse) error { + fmt.Printf("\n[Close] Received\n") + return nil +} + +func (c MyCallback) Warning(wr *msginterfaces.WarningResponse) error { + fmt.Printf("\n[Warning] Received\n") + fmt.Printf("Warning.Code: %s\n", wr.WarnCode) + fmt.Printf("Warning.Description: %s\n\n", wr.WarnMsg) + return nil +} + +func (c MyCallback) Error(er *msginterfaces.ErrorResponse) error { + fmt.Printf("\n[Error] Received\n") + fmt.Printf("Error.Code: %s\n", er.ErrCode) + fmt.Printf("Error.Description: %s\n\n", er.ErrMsg) + return nil +} + +func (c MyCallback) UnhandledEvent(byData []byte) error { + // handle the unhandled event + fmt.Printf("\n[UnhandledEvent] Received\n") + fmt.Printf("UnhandledEvent: %s\n\n", string(byData)) + return nil +} + +func main() { + // init library + speak.InitWithDefault() + + // Go context + ctx := context.Background() + + // print instructions + fmt.Print("\n\nPress ENTER to exit!\n\n") + + // set the Client options + cOptions := &interfaces.ClientOptions{ + // AutoFlushSpeakDelta: 1000, + } + + // set the TTS options + ttsOptions := &interfaces.WSSpeakOptions{ + Model: "aura-asteria-en", + Encoding: "linear16", + SampleRate: 48000, + } + + // create the callback + callback := MyCallback{} + + // create a new stream using the NewStream function + dgClient, err := speak.NewWSUsingCallback(ctx, "", cOptions, ttsOptions, callback) + if err != nil { + fmt.Println("ERROR creating TTS connection:", err) + return + } + + // connect the websocket to Deepgram + bConnected := dgClient.Connect() + if !bConnected { + fmt.Println("Client.Connect failed") + os.Exit(1) + } + + // Simulate user input to reset the buffer, flush, send new text, or just exit + time.Sleep(2 * time.Second) + fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ") + input := bufio.NewScanner(os.Stdin) + for input.Scan() { + switch input.Text() { + case "r": + err = dgClient.Reset() + if err != nil { + fmt.Printf("Error resetting buffer: %v\n", err) + } else { + fmt.Println("Buffer reset successfully.") + } + case "f": + // delete file if exists + _ = os.Remove(AUDIO_FILE) + + file, err := os.OpenFile(AUDIO_FILE, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) + if err != nil { + fmt.Printf("Failed to open file. Err: %v\n", err) + return + } + + // Add a wav audio container header to the file if you want to play the audio + // using a media player like VLC, Media Player, or Apple Music + header := []byte{ + 0x52, 0x49, 0x46, 0x46, // "RIFF" + 0x00, 0x00, 0x00, 0x00, // Placeholder for file size + 0x57, 0x41, 0x56, 0x45, // "WAVE" + 0x66, 0x6d, 0x74, 0x20, // "fmt " + 0x10, 0x00, 0x00, 0x00, // Chunk size (16) + 0x01, 0x00, // Audio format (1 for PCM) + 0x01, 0x00, // Number of channels (1) + 0x80, 0xbb, 0x00, 0x00, // Sample rate (48000) + 0x00, 0xee, 0x02, 0x00, // Byte rate (48000 * 2) + 0x02, 0x00, // Block align (2) + 0x10, 0x00, // Bits per sample (16) + 0x64, 0x61, 0x74, 0x61, // "data" + 0x00, 0x00, 0x00, 0x00, // Placeholder for data size + } + + _, err = file.Write(header) + if err != nil { + fmt.Printf("Failed to write header to file. Err: %v\n", err) + return + } + file.Close() + + err = dgClient.Flush() + if err != nil { + fmt.Printf("Error flushing buffer: %v\n", err) + } else { + fmt.Println("Buffer flushed successfully.") + } + case "": + goto EXIT + default: + err = dgClient.SpeakWithText(input.Text()) + if err != nil { + fmt.Printf("Error sending text input: %v\n", err) + } else { + fmt.Println("Text sent successfully.") + } + fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ") + } + } + +EXIT: + + // close the connection + dgClient.Stop() + + fmt.Printf("Program exiting...\n") +} diff --git a/examples/text-to-speech/websocket/interactive_channel/main.go b/examples/text-to-speech/websocket/interactive_channel/main.go new file mode 100644 index 00000000..5a2bf6b9 --- /dev/null +++ b/examples/text-to-speech/websocket/interactive_channel/main.go @@ -0,0 +1,324 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package main + +import ( + "bufio" + "context" + "fmt" + "os" + "strings" + "sync" + "time" + + msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces" + speak "github.com/deepgram/deepgram-go-sdk/pkg/client/speak" +) + +const ( + TTS_TEXT = "Hello, this is a text to speech example using Deepgram." + AUDIO_FILE = "output.wav" +) + +type MyHandler struct { + binaryChan chan *[]byte + openChan chan *msginterfaces.OpenResponse + metadataChan chan *msginterfaces.MetadataResponse + flushChan chan *msginterfaces.FlushedResponse + closeChan chan *msginterfaces.CloseResponse + warningChan chan *msginterfaces.WarningResponse + errorChan chan *msginterfaces.ErrorResponse + unhandledChan chan *[]byte +} + +func NewMyHandler() MyHandler { + handler := MyHandler{ + binaryChan: make(chan *[]byte), + openChan: make(chan *msginterfaces.OpenResponse), + metadataChan: make(chan *msginterfaces.MetadataResponse), + flushChan: make(chan *msginterfaces.FlushedResponse), + closeChan: make(chan *msginterfaces.CloseResponse), + warningChan: make(chan *msginterfaces.WarningResponse), + errorChan: make(chan *msginterfaces.ErrorResponse), + unhandledChan: make(chan *[]byte), + } + + go func() { + handler.Run() + }() + + return handler +} + +// GetUnhandled returns the binary event channels +func (dch MyHandler) GetBinary() []*chan *[]byte { + return []*chan *[]byte{&dch.binaryChan} +} + +// GetOpen returns the open channels +func (dch MyHandler) GetOpen() []*chan *msginterfaces.OpenResponse { + return []*chan *msginterfaces.OpenResponse{&dch.openChan} +} + +// GetMetadata returns the metadata channels +func (dch MyHandler) GetMetadata() []*chan *msginterfaces.MetadataResponse { + return []*chan *msginterfaces.MetadataResponse{&dch.metadataChan} +} + +// GetFlushed returns the flush channels +func (dch MyHandler) GetFlush() []*chan *msginterfaces.FlushedResponse { + return []*chan *msginterfaces.FlushedResponse{&dch.flushChan} +} + +// GetClose returns the close channels +func (dch MyHandler) GetClose() []*chan *msginterfaces.CloseResponse { + return []*chan *msginterfaces.CloseResponse{&dch.closeChan} +} + +// GetWarning returns the warning channels +func (dch MyHandler) GetWarning() []*chan *msginterfaces.WarningResponse { + return []*chan *msginterfaces.WarningResponse{&dch.warningChan} +} + +// GetError returns the error channels +func (dch MyHandler) GetError() []*chan *msginterfaces.ErrorResponse { + return []*chan *msginterfaces.ErrorResponse{&dch.errorChan} +} + +// GetUnhandled returns the unhandled event channels +func (dch MyHandler) GetUnhandled() []*chan *[]byte { + return []*chan *[]byte{&dch.unhandledChan} +} + +// Open is the callback for when the connection opens +// golintci: funlen +func (dch MyHandler) Run() error { + wgReceivers := sync.WaitGroup{} + + // open channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for _ = range dch.openChan { + fmt.Printf("\n\n[OpenResponse]\n\n") + } + }() + + // binary channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for br := range dch.binaryChan { + fmt.Printf("\n\n[Binary Data]\n\n") + fmt.Printf("Size: %d\n\n", len(*br)) + + file, err := os.OpenFile(AUDIO_FILE, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) + if err != nil { + fmt.Printf("Failed to open file. Err: %v\n", err) + continue + } + + _, err = file.Write(*br) + file.Close() + + if err != nil { + fmt.Printf("Failed to write to file. Err: %v\n", err) + continue + } + } + }() + + // metadata channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for mr := range dch.metadataChan { + fmt.Printf("\n[FlushedResponse]\n") + fmt.Printf("RequestID: %s\n", strings.TrimSpace(mr.RequestID)) + } + }() + + // flushed channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for _ = range dch.flushChan { + fmt.Printf("\n[FlushedResponse]\n") + } + }() + + // close channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for _ = range dch.closeChan { + fmt.Printf("\n\n[CloseResponse]\n\n") + } + }() + + // warning channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for er := range dch.warningChan { + fmt.Printf("\n[WarningResponse]\n") + fmt.Printf("\nWarning.Type: %s\n", er.WarnCode) + fmt.Printf("Warning.Message: %s\n", er.WarnMsg) + fmt.Printf("Warning.Description: %s\n\n", er.Description) + fmt.Printf("Warning.Variant: %s\n\n", er.Variant) + } + }() + + // error channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for er := range dch.errorChan { + fmt.Printf("\n[ErrorResponse]\n") + fmt.Printf("\nError.Type: %s\n", er.ErrCode) + fmt.Printf("Error.Message: %s\n", er.ErrMsg) + fmt.Printf("Error.Description: %s\n\n", er.Description) + fmt.Printf("Error.Variant: %s\n\n", er.Variant) + } + }() + + // unhandled event channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for byData := range dch.unhandledChan { + fmt.Printf("\n[UnhandledEvent]") + fmt.Printf("Dump:\n%s\n\n", string(*byData)) + } + }() + + // wait for all receivers to finish + wgReceivers.Wait() + + return nil +} + +func main() { + // init library + speak.InitWithDefault() + + // Go context + ctx := context.Background() + + // print instructions + fmt.Print("\n\nPress ENTER to exit!\n\n") + + // set the Client options + cOptions := &interfaces.ClientOptions{ + // AutoFlushSpeakDelta: 1000, + } + + // set the TTS options + ttsOptions := &interfaces.WSSpeakOptions{ + Model: "aura-asteria-en", + Encoding: "linear16", + SampleRate: 48000, + } + + // create the callback + callback := NewMyHandler() + + // create a new stream using the NewStream function + dgClient, err := speak.NewWSUsingChan(ctx, "", cOptions, ttsOptions, callback) + if err != nil { + fmt.Println("ERROR creating TTS connection:", err) + return + } + + // connect the websocket to Deepgram + bConnected := dgClient.Connect() + if !bConnected { + fmt.Println("Client.Connect failed") + os.Exit(1) + } + + // Simulate user input to reset the buffer, flush, send new text, or just exit + time.Sleep(2 * time.Second) + fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ") + input := bufio.NewScanner(os.Stdin) + for input.Scan() { + switch input.Text() { + case "r": + err = dgClient.Reset() + if err != nil { + fmt.Printf("Error resetting buffer: %v\n", err) + } else { + fmt.Println("Buffer reset successfully.") + } + case "f": + // delete file if exists + _ = os.Remove(AUDIO_FILE) + + file, err := os.OpenFile(AUDIO_FILE, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) + if err != nil { + fmt.Printf("Failed to open file. Err: %v\n", err) + return + } + + // Add a wav audio container header to the file if you want to play the audio + // using a media player like VLC, Media Player, or Apple Music + header := []byte{ + 0x52, 0x49, 0x46, 0x46, // "RIFF" + 0x00, 0x00, 0x00, 0x00, // Placeholder for file size + 0x57, 0x41, 0x56, 0x45, // "WAVE" + 0x66, 0x6d, 0x74, 0x20, // "fmt " + 0x10, 0x00, 0x00, 0x00, // Chunk size (16) + 0x01, 0x00, // Audio format (1 for PCM) + 0x01, 0x00, // Number of channels (1) + 0x80, 0xbb, 0x00, 0x00, // Sample rate (48000) + 0x00, 0xee, 0x02, 0x00, // Byte rate (48000 * 2) + 0x02, 0x00, // Block align (2) + 0x10, 0x00, // Bits per sample (16) + 0x64, 0x61, 0x74, 0x61, // "data" + 0x00, 0x00, 0x00, 0x00, // Placeholder for data size + } + + _, err = file.Write(header) + if err != nil { + fmt.Printf("Failed to write header to file. Err: %v\n", err) + return + } + file.Close() + + err = dgClient.Flush() + if err != nil { + fmt.Printf("Error flushing buffer: %v\n", err) + } else { + fmt.Println("Buffer flushed successfully.") + } + case "": + goto EXIT + default: + err = dgClient.SpeakWithText(input.Text()) + if err != nil { + fmt.Printf("Error sending text input: %v\n", err) + } else { + fmt.Println("Text sent successfully.") + } + fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ") + } + } + +EXIT: + + // close the connection + dgClient.Stop() + + fmt.Printf("Program exiting...\n") +} diff --git a/examples/text-to-speech/websocket/simple_callback/main.go b/examples/text-to-speech/websocket/simple_callback/main.go new file mode 100644 index 00000000..349726e5 --- /dev/null +++ b/examples/text-to-speech/websocket/simple_callback/main.go @@ -0,0 +1,179 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package main + +import ( + "context" + "fmt" + "os" + "strings" + "time" + + msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces/v1" + speak "github.com/deepgram/deepgram-go-sdk/pkg/client/speak" +) + +const ( + TTS_TEXT = "Hello, this is a text to speech example using Deepgram." + AUDIO_FILE = "output.wav" +) + +// Implement your own callback +type MyCallback struct{} + +func (c MyCallback) Open(or *msginterfaces.OpenResponse) error { + fmt.Printf("\n[Open] Received\n") + return nil +} + +func (c MyCallback) Metadata(md *msginterfaces.MetadataResponse) error { + fmt.Printf("\n[Metadata] Received\n") + fmt.Printf("Metadata.RequestID: %s\n", strings.TrimSpace(md.RequestID)) + return nil +} + +func (c MyCallback) Binary(byMsg []byte) error { + fmt.Printf("\n[Binary] Received\n") + + file, err := os.OpenFile(AUDIO_FILE, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o666) + if err != nil { + fmt.Printf("Error creating file %s: %v\n", AUDIO_FILE, err) + return err + } + defer file.Close() + + _, err = file.Write(byMsg) + if err != nil { + fmt.Printf("Error writing audio data to file: %v\n", err) + return err + } + + fmt.Printf("Audio data saved to %s\n", AUDIO_FILE) + return nil +} + +func (c MyCallback) Flush(fl *msginterfaces.FlushedResponse) error { + fmt.Printf("\n[Flushed] Received\n") + return nil +} + +func (c MyCallback) Close(cr *msginterfaces.CloseResponse) error { + fmt.Printf("\n[Close] Received\n") + return nil +} + +func (c MyCallback) Warning(wr *msginterfaces.WarningResponse) error { + fmt.Printf("\n[Warning] Received\n") + fmt.Printf("Warning.Code: %s\n", wr.WarnCode) + fmt.Printf("Warning.Description: %s\n\n", wr.WarnMsg) + return nil +} + +func (c MyCallback) Error(er *msginterfaces.ErrorResponse) error { + fmt.Printf("\n[Error] Received\n") + fmt.Printf("Error.Code: %s\n", er.ErrCode) + fmt.Printf("Error.Description: %s\n\n", er.ErrMsg) + return nil +} + +func (c MyCallback) UnhandledEvent(byData []byte) error { + // handle the unhandled event + fmt.Printf("\n[UnhandledEvent] Received\n") + fmt.Printf("UnhandledEvent: %s\n\n", string(byData)) + return nil +} + +func main() { + // init library + speak.Init(speak.InitLib{ + LogLevel: speak.LogLevelDefault, // LogLevelDefault, LogLevelFull, LogLevelDebug, LogLevelTrace + }) + + // Go context + ctx := context.Background() + + // set the Client options + cOptions := &interfaces.ClientOptions{ + // AutoFlushSpeakDelta: 1000, + } + + // set the TTS options + ttsOptions := &interfaces.WSSpeakOptions{ + Model: "aura-asteria-en", + Encoding: "linear16", + SampleRate: 48000, + } + + // create the callback + callback := MyCallback{} + + // create a new stream using the NewStream function + dgClient, err := speak.NewWSUsingCallback(ctx, "", cOptions, ttsOptions, callback) + if err != nil { + fmt.Println("ERROR creating TTS connection:", err) + return + } + + // connect the websocket to Deepgram + bConnected := dgClient.Connect() + if !bConnected { + fmt.Println("Client.Connect failed") + os.Exit(1) + } + + file, err := os.OpenFile(AUDIO_FILE, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) + if err != nil { + fmt.Printf("Failed to open file. Err: %v\n", err) + return + } + + // Add a wav audio container header to the file if you want to play the audio + // using a media player like VLC, Media Player, or Apple Music + header := []byte{ + 0x52, 0x49, 0x46, 0x46, // "RIFF" + 0x00, 0x00, 0x00, 0x00, // Placeholder for file size + 0x57, 0x41, 0x56, 0x45, // "WAVE" + 0x66, 0x6d, 0x74, 0x20, // "fmt " + 0x10, 0x00, 0x00, 0x00, // Chunk size (16) + 0x01, 0x00, // Audio format (1 for PCM) + 0x01, 0x00, // Number of channels (1) + 0x80, 0xbb, 0x00, 0x00, // Sample rate (48000) + 0x00, 0xee, 0x02, 0x00, // Byte rate (48000 * 2) + 0x02, 0x00, // Block align (2) + 0x10, 0x00, // Bits per sample (16) + 0x64, 0x61, 0x74, 0x61, // "data" + 0x00, 0x00, 0x00, 0x00, // Placeholder for data size + } + + _, err = file.Write(header) + if err != nil { + fmt.Printf("Failed to write header to file. Err: %v\n", err) + return + } + file.Close() + + // Send the text input + err = dgClient.SpeakWithText(TTS_TEXT) + if err != nil { + fmt.Printf("Error sending text input: %v\n", err) + return + } + + // If AutoFlushSpeakDelta is not set, you Flush the text input manually + err = dgClient.Flush() + if err != nil { + fmt.Printf("Error sending text input: %v\n", err) + return + } + + // wait for user input to exit + time.Sleep(5 * time.Second) + + // close the connection + dgClient.Stop() + + fmt.Printf("Program exiting...\n") +} diff --git a/examples/text-to-speech/websocket/simple_channel/main.go b/examples/text-to-speech/websocket/simple_channel/main.go new file mode 100644 index 00000000..7cf814a1 --- /dev/null +++ b/examples/text-to-speech/websocket/simple_channel/main.go @@ -0,0 +1,299 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package main + +import ( + "context" + "fmt" + "os" + "strings" + "sync" + "time" + + msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces/v1" + speak "github.com/deepgram/deepgram-go-sdk/pkg/client/speak" +) + +const ( + TTS_TEXT = "Hello, this is a text to speech example using Deepgram." + AUDIO_FILE = "output.wav" +) + +type MyHandler struct { + binaryChan chan *[]byte + openChan chan *msginterfaces.OpenResponse + metadataChan chan *msginterfaces.MetadataResponse + flushChan chan *msginterfaces.FlushedResponse + closeChan chan *msginterfaces.CloseResponse + warningChan chan *msginterfaces.WarningResponse + errorChan chan *msginterfaces.ErrorResponse + unhandledChan chan *[]byte +} + +func NewMyHandler() MyHandler { + handler := MyHandler{ + binaryChan: make(chan *[]byte), + openChan: make(chan *msginterfaces.OpenResponse), + metadataChan: make(chan *msginterfaces.MetadataResponse), + flushChan: make(chan *msginterfaces.FlushedResponse), + closeChan: make(chan *msginterfaces.CloseResponse), + warningChan: make(chan *msginterfaces.WarningResponse), + errorChan: make(chan *msginterfaces.ErrorResponse), + unhandledChan: make(chan *[]byte), + } + + go func() { + handler.Run() + }() + + return handler +} + +// GetUnhandled returns the binary event channels +func (dch MyHandler) GetBinary() []*chan *[]byte { + return []*chan *[]byte{&dch.binaryChan} +} + +// GetOpen returns the open channels +func (dch MyHandler) GetOpen() []*chan *msginterfaces.OpenResponse { + return []*chan *msginterfaces.OpenResponse{&dch.openChan} +} + +// GetMetadata returns the metadata channels +func (dch MyHandler) GetMetadata() []*chan *msginterfaces.MetadataResponse { + return []*chan *msginterfaces.MetadataResponse{&dch.metadataChan} +} + +// GetFlushed returns the flush channels +func (dch MyHandler) GetFlush() []*chan *msginterfaces.FlushedResponse { + return []*chan *msginterfaces.FlushedResponse{&dch.flushChan} +} + +// GetClose returns the close channels +func (dch MyHandler) GetClose() []*chan *msginterfaces.CloseResponse { + return []*chan *msginterfaces.CloseResponse{&dch.closeChan} +} + +// GetWarning returns the warning channels +func (dch MyHandler) GetWarning() []*chan *msginterfaces.WarningResponse { + return []*chan *msginterfaces.WarningResponse{&dch.warningChan} +} + +// GetError returns the error channels +func (dch MyHandler) GetError() []*chan *msginterfaces.ErrorResponse { + return []*chan *msginterfaces.ErrorResponse{&dch.errorChan} +} + +// GetUnhandled returns the unhandled event channels +func (dch MyHandler) GetUnhandled() []*chan *[]byte { + return []*chan *[]byte{&dch.unhandledChan} +} + +// Open is the callback for when the connection opens +// golintci: funlen +func (dch MyHandler) Run() error { + wgReceivers := sync.WaitGroup{} + + // open channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for _ = range dch.openChan { + fmt.Printf("\n\n[OpenResponse]\n\n") + } + }() + + // binary channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for br := range dch.binaryChan { + fmt.Printf("\n\n[Binary Data]\n") + + file, err := os.OpenFile(AUDIO_FILE, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) + if err != nil { + fmt.Printf("Failed to open file. Err: %v\n", err) + continue + } + + _, err = file.Write(*br) + file.Close() + + if err != nil { + fmt.Printf("Failed to write to file. Err: %v\n", err) + continue + } + } + }() + + // metadata channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for mr := range dch.metadataChan { + fmt.Printf("\n[FlushedResponse]\n") + fmt.Printf("RequestID: %s\n", strings.TrimSpace(mr.RequestID)) + } + }() + + // flushed channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for _ = range dch.flushChan { + fmt.Printf("\n[FlushedResponse]\n") + } + }() + + // close channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for _ = range dch.closeChan { + fmt.Printf("\n\n[CloseResponse]\n\n") + } + }() + + // warning channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for er := range dch.warningChan { + fmt.Printf("\n[WarningResponse]\n") + fmt.Printf("\nWarning.Type: %s\n", er.WarnCode) + fmt.Printf("Warning.Message: %s\n", er.WarnMsg) + fmt.Printf("Warning.Description: %s\n\n", er.Description) + fmt.Printf("Warning.Variant: %s\n\n", er.Variant) + } + }() + + // error channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for er := range dch.errorChan { + fmt.Printf("\n[ErrorResponse]\n") + fmt.Printf("\nError.Type: %s\n", er.ErrCode) + fmt.Printf("Error.Message: %s\n", er.ErrMsg) + fmt.Printf("Error.Description: %s\n\n", er.Description) + fmt.Printf("Error.Variant: %s\n\n", er.Variant) + } + }() + + // unhandled event channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for byData := range dch.unhandledChan { + fmt.Printf("\n[UnhandledEvent]") + fmt.Printf("Dump:\n%s\n\n", string(*byData)) + } + }() + + // wait for all receivers to finish + wgReceivers.Wait() + + return nil +} + +func main() { + // init library + speak.Init(speak.InitLib{ + LogLevel: speak.LogLevelDefault, // LogLevelDefault, LogLevelFull, LogLevelDebug, LogLevelTrace + }) + + // Go context + ctx := context.Background() + + // set the Client options + cOptions := &interfaces.ClientOptions{ + // AutoFlushSpeakDelta: 1000, + } + + // set the TTS options + ttsOptions := &interfaces.WSSpeakOptions{ + Model: "aura-asteria-en", + Encoding: "linear16", + SampleRate: 48000, + } + + // create the callback + callback := NewMyHandler() + + // create a new stream using the NewStream function + dgClient, err := speak.NewWSUsingChan(ctx, "", cOptions, ttsOptions, callback) + if err != nil { + fmt.Println("ERROR creating TTS connection:", err) + return + } + + // connect the websocket to Deepgram + bConnected := dgClient.Connect() + if !bConnected { + fmt.Println("Client.Connect failed") + os.Exit(1) + } + + file, err := os.OpenFile(AUDIO_FILE, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) + if err != nil { + fmt.Printf("Failed to open file. Err: %v\n", err) + return + } + // Add a wav audio container header to the file if you want to play the audio + // using a media player like VLC, Media Player, or Apple Music + header := []byte{ + 0x52, 0x49, 0x46, 0x46, // "RIFF" + 0x00, 0x00, 0x00, 0x00, // Placeholder for file size + 0x57, 0x41, 0x56, 0x45, // "WAVE" + 0x66, 0x6d, 0x74, 0x20, // "fmt " + 0x10, 0x00, 0x00, 0x00, // Chunk size (16) + 0x01, 0x00, // Audio format (1 for PCM) + 0x01, 0x00, // Number of channels (1) + 0x80, 0xbb, 0x00, 0x00, // Sample rate (48000) + 0x00, 0xee, 0x02, 0x00, // Byte rate (48000 * 2) + 0x02, 0x00, // Block align (2) + 0x10, 0x00, // Bits per sample (16) + 0x64, 0x61, 0x74, 0x61, // "data" + 0x00, 0x00, 0x00, 0x00, // Placeholder for data size + } + + _, err = file.Write(header) + if err != nil { + fmt.Printf("Failed to write header to file. Err: %v\n", err) + return + } + file.Close() + + // Send the text input + err = dgClient.SpeakWithText(TTS_TEXT) + if err != nil { + fmt.Printf("Error sending text input: %v\n", err) + return + } + + // If AutoFlushSpeakDelta is not set, you Flush the text input manually + err = dgClient.Flush() + if err != nil { + fmt.Printf("Error sending text input: %v\n", err) + return + } + + // wait for user input to exit + time.Sleep(5 * time.Second) + + // close the connection + dgClient.Stop() + + fmt.Printf("Program exiting...\n") +} diff --git a/hack/check/tools/go.mod b/hack/check/tools/go.mod index 0915e832..042d235f 100644 --- a/hack/check/tools/go.mod +++ b/hack/check/tools/go.mod @@ -1,4 +1,4 @@ -module github.com/vmware-tanzu/community-edition/hack/tools +module github.com/deepgram/deepgram-go-sdk/hack/tools go 1.19 @@ -171,3 +171,5 @@ require ( mvdan.cc/lint v0.0.0-20170908181259-adc824a0674b // indirect mvdan.cc/unparam v0.0.0-20220706161116-678bad134442 // indirect ) + +replace github.com/gorilla/websocket => github.com/dvonthenen/websocket v1.5.1-dyv.2 diff --git a/hack/check/tools/go.sum b/hack/check/tools/go.sum index f47e3c57..f996a03e 100644 --- a/hack/check/tools/go.sum +++ b/hack/check/tools/go.sum @@ -141,6 +141,7 @@ github.com/denis-tingaikin/go-header v0.4.3/go.mod h1:0wOCWuN71D5qIgE2nz9KrKmuYB github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dvonthenen/websocket v1.5.1-dyv.2/go.mod h1:q2GbopbpFJvBP4iqVvqwwahVmvu2HnCfdqCWDoQVKMM= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -311,8 +312,6 @@ github.com/gordonklaus/ineffassign v0.0.0-20210914165742-4cc7213b9bc8 h1:PVRE9d4 github.com/gordonklaus/ineffassign v0.0.0-20210914165742-4cc7213b9bc8/go.mod h1:Qcp2HIAYhR7mNUVSIxZww3Guk4it82ghYcEXIAk+QT0= github.com/gorhill/cronexpr v0.0.0-20180427100037-88b0669f7d75/go.mod h1:g2644b03hfBX9Ov0ZBDgXXens4rxSxmqFBbhvKv2yVA= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= -github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gostaticanalysis/analysisutil v0.0.0-20190318220348-4088753ea4d3/go.mod h1:eEOZF4jCKGi+aprrirO9e7WKB3beBRtWgqGunKl6pKE= github.com/gostaticanalysis/analysisutil v0.0.3/go.mod h1:eEOZF4jCKGi+aprrirO9e7WKB3beBRtWgqGunKl6pKE= github.com/gostaticanalysis/analysisutil v0.1.0/go.mod h1:dMhHRU9KTiDcuLGdy87/2gTR8WruwYZrKdRq9m1O6uw= diff --git a/pkg/api/listen/v1/websocket/callback_router.go b/pkg/api/listen/v1/websocket/callback_router.go index 21f2aa8a..6f0384a3 100644 --- a/pkg/api/listen/v1/websocket/callback_router.go +++ b/pkg/api/listen/v1/websocket/callback_router.go @@ -167,7 +167,7 @@ func (r *CallbackRouter) Message(byMsg []byte) error { err = r.processSpeechStartedResponse(byMsg) case interfaces.TypeUtteranceEndResponse: err = r.processUtteranceEndResponse(byMsg) - case interfaces.TypeErrorResponse: + case interfaces.TypeResponse(interfaces.TypeErrorResponse): err = r.processErrorResponse(byMsg) default: err = r.UnhandledMessage(byMsg) @@ -182,6 +182,12 @@ func (r *CallbackRouter) Message(byMsg []byte) error { return err } +// Binary handles platform messages and routes them appropriately based on the MessageType +func (r *CallbackRouter) Binary(byMsg []byte) error { + // No implementation needed on STT + return nil +} + // UnhandledMessage logs and handles any unexpected message types func (r *CallbackRouter) UnhandledMessage(byMsg []byte) error { klog.V(6).Infof("router.UnhandledMessage ENTER\n") diff --git a/pkg/api/listen/v1/websocket/chan_router.go b/pkg/api/listen/v1/websocket/chan_router.go index 38e4f0f3..e52ce43a 100644 --- a/pkg/api/listen/v1/websocket/chan_router.go +++ b/pkg/api/listen/v1/websocket/chan_router.go @@ -17,29 +17,20 @@ import ( // NewWithDefault creates a ChanRouter with the default callback handler func NewChanWithDefault() *ChanRouter { - defChan := NewDefaultChanHandler() + chans := NewDefaultChanHandler() go func() { - err := defChan.Run() + err := chans.Run() if err != nil { - klog.V(1).Infof("defChan.Run failed. Err: %v\n", err) + klog.V(1).Infof("chans.Run failed. Err: %v\n", err) } }() - var debugStr string - if v := os.Getenv("DEEPGRAM_DEBUG"); v != "" { - klog.V(4).Infof("DEEPGRAM_DEBUG found") - debugStr = v - } - - return &ChanRouter{ - debugWebsocket: strings.EqualFold(strings.ToLower(debugStr), "true"), - defaultHandler: defChan, - } + return NewChanRouter(chans) } // New creates a ChanRouter with a user-defined channels // gocritic:ignore -func NewChanRouter(chans *interfaces.LiveMessageChan) *ChanRouter { +func NewChanRouter(chans interfaces.LiveMessageChan) *ChanRouter { var debugStr string if v := os.Getenv("DEEPGRAM_DEBUG"); v != "" { klog.V(4).Infof("DEEPGRAM_DEBUG found") @@ -59,14 +50,14 @@ func NewChanRouter(chans *interfaces.LiveMessageChan) *ChanRouter { } if chans != nil { - router.openChan = append(router.openChan, (*chans).GetOpen()...) - router.messageChan = append(router.messageChan, (*chans).GetMessage()...) - router.metadataChan = append(router.metadataChan, (*chans).GetMetadata()...) - router.speechStartedChan = append(router.speechStartedChan, (*chans).GetSpeechStarted()...) - router.utteranceEndChan = append(router.utteranceEndChan, (*chans).GetUtteranceEnd()...) - router.closeChan = append(router.closeChan, (*chans).GetClose()...) - router.errorChan = append(router.errorChan, (*chans).GetError()...) - router.unhandledChan = append(router.unhandledChan, (*chans).GetUnhandled()...) + router.openChan = append(router.openChan, chans.GetOpen()...) + router.messageChan = append(router.messageChan, chans.GetMessage()...) + router.metadataChan = append(router.metadataChan, chans.GetMetadata()...) + router.speechStartedChan = append(router.speechStartedChan, chans.GetSpeechStarted()...) + router.utteranceEndChan = append(router.utteranceEndChan, chans.GetUtteranceEnd()...) + router.closeChan = append(router.closeChan, chans.GetClose()...) + router.errorChan = append(router.errorChan, chans.GetError()...) + router.unhandledChan = append(router.unhandledChan, chans.GetUnhandled()...) } return router @@ -93,7 +84,7 @@ func (r *ChanRouter) Open(or *interfaces.OpenResponse) error { return nil } - return r.processGeneric(interfaces.TypeOpenResponse, byMsg, action) + return r.processGeneric(string(interfaces.TypeOpenResponse), byMsg, action) } // Close sends an CloseResponse message to the callback @@ -117,7 +108,7 @@ func (r *ChanRouter) Close(cr *interfaces.CloseResponse) error { return nil } - return r.processGeneric(interfaces.TypeCloseResponse, byMsg, action) + return r.processGeneric(string(interfaces.TypeCloseResponse), byMsg, action) } // Error sends an ErrorResponse message to the callback @@ -141,11 +132,11 @@ func (r *ChanRouter) Error(er *interfaces.ErrorResponse) error { return nil } - return r.processGeneric(interfaces.TypeErrorResponse, byMsg, action) + return r.processGeneric(string(interfaces.TypeErrorResponse), byMsg, action) } // processGeneric generalizes the handling of all message types -func (r *ChanRouter) processGeneric(msgType interfaces.TypeResponse, byMsg []byte, action func(data []byte) error) error { +func (r *ChanRouter) processGeneric(msgType string, byMsg []byte, action func(data []byte) error) error { klog.V(6).Infof("router.%s ENTER\n", msgType) r.printDebugMessages(5, msgType, byMsg) @@ -175,7 +166,7 @@ func (r *ChanRouter) processMessage(byMsg []byte) error { return nil } - return r.processGeneric(interfaces.TypeMessageResponse, byMsg, action) + return r.processGeneric(string(interfaces.TypeMessageResponse), byMsg, action) } func (r *ChanRouter) processMetadata(byMsg []byte) error { @@ -192,7 +183,7 @@ func (r *ChanRouter) processMetadata(byMsg []byte) error { return nil } - return r.processGeneric(interfaces.TypeMetadataResponse, byMsg, action) + return r.processGeneric(string(interfaces.TypeMetadataResponse), byMsg, action) } func (r *ChanRouter) processSpeechStartedResponse(byMsg []byte) error { @@ -209,7 +200,7 @@ func (r *ChanRouter) processSpeechStartedResponse(byMsg []byte) error { return nil } - return r.processGeneric(interfaces.TypeSpeechStartedResponse, byMsg, action) + return r.processGeneric(string(interfaces.TypeSpeechStartedResponse), byMsg, action) } func (r *ChanRouter) processUtteranceEndResponse(byMsg []byte) error { @@ -226,7 +217,7 @@ func (r *ChanRouter) processUtteranceEndResponse(byMsg []byte) error { return nil } - return r.processGeneric(interfaces.TypeUtteranceEndResponse, byMsg, action) + return r.processGeneric(string(interfaces.TypeUtteranceEndResponse), byMsg, action) } func (r *ChanRouter) processErrorResponse(byMsg []byte) error { @@ -243,7 +234,7 @@ func (r *ChanRouter) processErrorResponse(byMsg []byte) error { return nil } - return r.processGeneric(interfaces.TypeErrorResponse, byMsg, action) + return r.processGeneric(string(interfaces.TypeErrorResponse), byMsg, action) } // Message handles platform messages and routes them appropriately based on the MessageType @@ -271,7 +262,7 @@ func (r *ChanRouter) Message(byMsg []byte) error { err = r.processSpeechStartedResponse(byMsg) case interfaces.TypeUtteranceEndResponse: err = r.processUtteranceEndResponse(byMsg) - case interfaces.TypeErrorResponse: + case interfaces.TypeResponse(interfaces.TypeErrorResponse): err = r.processErrorResponse(byMsg) default: err = r.UnhandledMessage(byMsg) @@ -286,6 +277,12 @@ func (r *ChanRouter) Message(byMsg []byte) error { return err } +// Binary handles platform messages and routes them appropriately based on the MessageType +func (r *ChanRouter) Binary(byMsg []byte) error { + // No implementation needed on STT + return nil +} + // UnhandledMessage logs and handles any unexpected message types func (r *ChanRouter) UnhandledMessage(byMsg []byte) error { klog.V(6).Infof("router.UnhandledMessage ENTER\n") @@ -301,7 +298,7 @@ func (r *ChanRouter) UnhandledMessage(byMsg []byte) error { } // printDebugMessages formats and logs debugging messages -func (r *ChanRouter) printDebugMessages(level klog.Level, function interfaces.TypeResponse, byMsg []byte) { +func (r *ChanRouter) printDebugMessages(level klog.Level, function string, byMsg []byte) { prettyJSON, err := prettyjson.Format(byMsg) if err != nil { klog.V(1).Infof("prettyjson.Format failed. Err: %v\n", err) diff --git a/pkg/api/listen/v1/websocket/interfaces/constants.go b/pkg/api/listen/v1/websocket/interfaces/constants.go index 43f50138..9560550c 100644 --- a/pkg/api/listen/v1/websocket/interfaces/constants.go +++ b/pkg/api/listen/v1/websocket/interfaces/constants.go @@ -4,20 +4,22 @@ package interfacesv1 +import ( + commoninterfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1/interfaces" +) + // These are the message types that can be received from the live API -type TypeResponse string +type TypeResponse commoninterfaces.TypeResponse const ( // message types - TypeOpenResponse TypeResponse = "Open" + TypeOpenResponse = commoninterfaces.TypeOpenResponse TypeMessageResponse TypeResponse = "Results" TypeMetadataResponse TypeResponse = "Metadata" TypeUtteranceEndResponse TypeResponse = "UtteranceEnd" TypeSpeechStartedResponse TypeResponse = "SpeechStarted" TypeFinalizeResponse TypeResponse = "Finalize" + TypeCloseResponse = commoninterfaces.TypeCloseResponse TypeCloseStreamResponse TypeResponse = "CloseStream" - TypeCloseResponse TypeResponse = "Close" - - // Error type - TypeErrorResponse TypeResponse = "Error" + TypeErrorResponse = commoninterfaces.TypeErrorResponse ) diff --git a/pkg/api/listen/v1/websocket/interfaces/interfaces.go b/pkg/api/listen/v1/websocket/interfaces/interfaces.go index aca989f1..3dbdcc29 100644 --- a/pkg/api/listen/v1/websocket/interfaces/interfaces.go +++ b/pkg/api/listen/v1/websocket/interfaces/interfaces.go @@ -5,16 +5,6 @@ // This package defines interfaces for the live API package interfacesv1 -/* -Router definition -*/ -type Router interface { - Open(or *OpenResponse) error - Message(byMsg []byte) error - Close(or *CloseResponse) error - Error(er *ErrorResponse) error -} - /* Chan Interfaces */ diff --git a/pkg/api/listen/v1/websocket/interfaces/types.go b/pkg/api/listen/v1/websocket/interfaces/types.go index 270d1159..97977858 100644 --- a/pkg/api/listen/v1/websocket/interfaces/types.go +++ b/pkg/api/listen/v1/websocket/interfaces/types.go @@ -5,6 +5,7 @@ package interfacesv1 import ( + commoninterfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1/interfaces" interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces" ) @@ -79,11 +80,8 @@ type LiveTranscriptionOptions interfaces.LiveTranscriptionOptions /***********************************/ // Results from Live Transcription /***********************************/ - -// OpenResponse is the response from the connection starting -type OpenResponse struct { - Type string `json:"type,omitempty"` -} +// OpenResponse is the response from opening the connection +type OpenResponse = commoninterfaces.OpenResponse // MessageResponse is the response from a live transcription type MessageResponse struct { @@ -125,10 +123,8 @@ type SpeechStartedResponse struct { Timestamp float64 `json:"timestamp,omitempty"` } -// CloseResponse is the response from the connection closing -type CloseResponse struct { - Type string `json:"type,omitempty"` -} +// CloseResponse is the response from closing the connection +type CloseResponse = commoninterfaces.CloseResponse // ErrorResponse is the Deepgram specific response error type ErrorResponse = interfaces.DeepgramError diff --git a/pkg/api/listen/v1/websocket/types.go b/pkg/api/listen/v1/websocket/types.go index 0864f670..4e881336 100644 --- a/pkg/api/listen/v1/websocket/types.go +++ b/pkg/api/listen/v1/websocket/types.go @@ -30,7 +30,6 @@ type DefaultChanHandler struct { // ChanRouter routes events type ChanRouter struct { debugWebsocket bool - defaultHandler *DefaultChanHandler // call out to channels openChan []*chan *interfaces.OpenResponse diff --git a/pkg/api/speak/v1/interfaces/types.go b/pkg/api/speak/v1/interfaces/types.go deleted file mode 100644 index 1fe3a3a7..00000000 --- a/pkg/api/speak/v1/interfaces/types.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. -// Use of this source code is governed by a MIT license that can be found in the LICENSE file. -// SPDX-License-Identifier: MIT - -package interfacesv1 - -import ( - interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces" -) - -/***********************************/ -// Request/Input structs -/***********************************/ -type SpeakOptions interfaces.SpeakOptions - -/***********************************/ -// response/result structs -/***********************************/ -type SpeakResponse struct { - ContextType string `json:"content_type,omitempty"` - RequestID string `json:"request_id,omitempty"` - ModelUUID string `json:"model_uuid,omitempty"` - Characters int `json:"characters,omitempty"` - ModelName string `json:"model_name,omitempty"` - TransferEncoding string `json:"transfer_encoding,omitempty"` - Date string `json:"date,omitempty"` - Filename string `json:"filename,omitempty"` -} - -// ErrorResponse is the Deepgram specific response error -type ErrorResponse interfaces.DeepgramError diff --git a/pkg/api/speak/v1/websocket/callback_default.go b/pkg/api/speak/v1/websocket/callback_default.go new file mode 100644 index 00000000..1abe0198 --- /dev/null +++ b/pkg/api/speak/v1/websocket/callback_default.go @@ -0,0 +1,217 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package websocketv1 + +import ( + "encoding/json" + "fmt" + "os" + "strings" + + prettyjson "github.com/hokaccha/go-prettyjson" + klog "k8s.io/klog/v2" + + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" +) + +// NewDefaultCallbackHandler creates a new DefaultCallbackHandler +func NewDefaultCallbackHandler() *DefaultCallbackHandler { + var debugStr string + if v := os.Getenv("DEEPGRAM_DEBUG"); v != "" { + klog.V(4).Infof("DEEPGRAM_DEBUG found") + debugStr = v + } + var debugExtStr string + if v := os.Getenv("DEEPGRAM_DEBUG_VERBOSE"); v != "" { + klog.V(4).Infof("DEEPGRAM_DEBUG_VERBOSE found") + debugExtStr = v + } + return &DefaultCallbackHandler{ + debugWebsocket: strings.EqualFold(debugStr, "true"), + debugWebsocketVerbose: strings.EqualFold(debugExtStr, "true"), + } +} + +// Open is the callback for when the connection opens +func (dch DefaultCallbackHandler) Open(or *interfaces.OpenResponse) error { + if dch.debugWebsocket { + data, err := json.Marshal(or) + if err != nil { + klog.V(1).Infof("Open json.Marshal failed. Err: %v\n", err) + return err + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + return err + } + klog.V(2).Infof("\n\nOpen Object:\n%s\n\n", prettyJSON) + + return nil + } + + // handle the message + fmt.Printf("\n\n[OpenResponse]\n\n") + + return nil +} + +// Metadata is the callback for information about the connection +func (dch DefaultCallbackHandler) Metadata(md *interfaces.MetadataResponse) error { + if dch.debugWebsocket { + data, err := json.Marshal(md) + if err != nil { + klog.V(1).Infof("Metadata json.Marshal failed. Err: %v\n", err) + return err + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + return err + } + klog.V(2).Infof("\n\nMetadata Object:\n%s\n\n", prettyJSON) + + return nil + } + + // handle the message + fmt.Printf("\n\nMetadata.RequestID: %s\n", strings.TrimSpace(md.RequestID)) + + return nil +} + +// Flushed is the callback for when the connection flushes +func (dch DefaultCallbackHandler) Flush(fr *interfaces.FlushedResponse) error { + if dch.debugWebsocket { + data, err := json.Marshal(fr) + if err != nil { + klog.V(1).Infof("Flush json.Marshal failed. Err: %v\n", err) + return err + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + return err + } + klog.V(2).Infof("\n\nFlush Object:\n%s\n\n", prettyJSON) + + return nil + } + + // handle the message + fmt.Printf("\n\nFlushed.SequenceID: %d\n", fr.SequenceID) + + return nil +} + +// Binary is the callback for when the connection receives binary data +func (dch DefaultCallbackHandler) Binary(br []byte) error { + klog.V(3).Infof("Received binary data: %d bytes", len(br)) + return nil +} + +// Close is the callback for when the connection closes +func (dch DefaultCallbackHandler) Close(or *interfaces.CloseResponse) error { + if dch.debugWebsocket { + data, err := json.Marshal(or) + if err != nil { + klog.V(1).Infof("Close json.Marshal failed. Err: %v\n", err) + return err + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + return err + } + klog.V(2).Infof("\n\nClose Object:\n%s\n\n", prettyJSON) + + return nil + } + + // handle the message + fmt.Printf("\n\n[CloseResponse]\n\n") + + return nil +} + +// Warning is the callback for error messages +func (dch DefaultCallbackHandler) Warning(wr *interfaces.WarningResponse) error { + if dch.debugWebsocket { + data, err := json.Marshal(wr) + if err != nil { + klog.V(1).Infof("Error json.Marshal failed. Err: %v\n", err) + return err + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + return err + } + klog.V(2).Infof("\n\nWarning Object:\n%s\n\n", prettyJSON) + + return nil + } + + // handle the message + fmt.Printf("\n[WarningResponse]\n") + fmt.Printf("\nError.Code: %s\n", wr.WarnCode) + fmt.Printf("Error.Message: %s\n", wr.WarnMsg) + + return nil +} + +// Error is the callback for error messages +func (dch DefaultCallbackHandler) Error(er *interfaces.ErrorResponse) error { + if dch.debugWebsocket { + data, err := json.Marshal(er) + if err != nil { + klog.V(1).Infof("Error json.Marshal failed. Err: %v\n", err) + return err + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + return err + } + klog.V(2).Infof("\n\nError Object:\n%s\n\n", prettyJSON) + + return nil + } + + // handle the message + fmt.Printf("\n[ErrorResponse]\n") + fmt.Printf("\nError.Type: %s\n", er.ErrCode) + fmt.Printf("Error.Message: %s\n", er.ErrMsg) + fmt.Printf("Error.Description: %s\n\n", er.Description) + fmt.Printf("Error.Variant: %s\n\n", er.Variant) + + return nil +} + +// UnhandledEvent is the callback for unknown messages +func (dch DefaultCallbackHandler) UnhandledEvent(byData []byte) error { + if dch.debugWebsocket { + prettyJSON, err := prettyjson.Format(byData) + if err != nil { + klog.V(2).Infof("\n\nRaw Data:\n%s\n\n", string(byData)) + } else { + klog.V(2).Infof("\n\nError Object:\n%s\n\n", prettyJSON) + } + + return nil + } + + // handle the message + fmt.Printf("\n[UnhandledEvent]") + fmt.Printf("Dump:\n%s\n\n", string(byData)) + + return nil +} diff --git a/pkg/api/speak/v1/websocket/callback_router.go b/pkg/api/speak/v1/websocket/callback_router.go new file mode 100644 index 00000000..55593094 --- /dev/null +++ b/pkg/api/speak/v1/websocket/callback_router.go @@ -0,0 +1,201 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package websocketv1 + +import ( + "encoding/json" + "os" + "strings" + + prettyjson "github.com/hokaccha/go-prettyjson" + klog "k8s.io/klog/v2" + + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" +) + +// NewWithDefault creates a CallbackRouter with the default callback handler +func NewCallbackWithDefault() *CallbackRouter { + var callback interfaces.SpeakMessageCallback + handler := NewDefaultCallbackHandler() + callback = handler + return NewCallbackRouter(callback) +} + +// New creates a CallbackRouter with a user-defined callback +func NewCallbackRouter(callback interfaces.SpeakMessageCallback) *CallbackRouter { + var debugStr string + if v := os.Getenv("DEEPGRAM_DEBUG"); v != "" { + klog.V(4).Infof("DEEPGRAM_DEBUG found") + debugStr = v + } + return &CallbackRouter{ + callback: callback, + debugWebsocket: strings.EqualFold(strings.ToLower(debugStr), "true"), + } +} + +// OpenHelper handles the OpenResponse message +func (r *CallbackRouter) Open(or *interfaces.OpenResponse) error { + return r.callback.Open(or) +} + +// CloseHelper handles the OpenResponse message +func (r *CallbackRouter) Close(or *interfaces.CloseResponse) error { + return r.callback.Close(or) +} + +// ErrorHelper handles the ErrorResponse message +func (r *CallbackRouter) Error(er *interfaces.ErrorResponse) error { + return r.callback.Error(er) +} + +// processMessage generalizes the handling of all message types +func (r *CallbackRouter) processGeneric(msgType string, byMsg []byte, action func(data *interface{}) error, data interface{}) error { + klog.V(6).Infof("router.%s ENTER\n", msgType) + + r.printDebugMessages(5, msgType, byMsg) + + var err error + if err = action(&data); err != nil { + klog.V(1).Infof("callback.%s failed. Err: %v\n", msgType, err) + } else { + klog.V(5).Infof("callback.%s succeeded\n", msgType) + } + klog.V(6).Infof("router.%s LEAVE\n", msgType) + + return err +} + +func (r *CallbackRouter) processFlushed(byMsg []byte) error { + var msg interfaces.FlushedResponse + if err := json.Unmarshal(byMsg, &msg); err != nil { + return err + } + + action := func(data *interface{}) error { + return r.callback.Flush(&msg) + } + + return r.processGeneric(string(interfaces.TypeFlushedResponse), byMsg, action, msg) +} + +func (r *CallbackRouter) processMetadata(byMsg []byte) error { + var msg interfaces.MetadataResponse + if err := json.Unmarshal(byMsg, &msg); err != nil { + return err + } + + action := func(data *interface{}) error { + return r.callback.Metadata(&msg) + } + + return r.processGeneric(string(interfaces.TypeMetadataResponse), byMsg, action, msg) +} + +func (r *CallbackRouter) processWarningResponse(byMsg []byte) error { + var msg interfaces.WarningResponse + if err := json.Unmarshal(byMsg, &msg); err != nil { + return err + } + + action := func(data *interface{}) error { + return r.callback.Warning(&msg) + } + + return r.processGeneric(string(interfaces.TypeWarningResponse), byMsg, action, msg) +} + +func (r *CallbackRouter) processErrorResponse(byMsg []byte) error { + var msg interfaces.ErrorResponse + if err := json.Unmarshal(byMsg, &msg); err != nil { + return err + } + + action := func(data *interface{}) error { + return r.callback.Error(&msg) + } + + return r.processGeneric(string(interfaces.TypeErrorResponse), byMsg, action, msg) +} + +// Message handles platform messages and routes them appropriately based on the MessageType +func (r *CallbackRouter) Message(byMsg []byte) error { + klog.V(6).Infof("router.Message ENTER\n") + + if r.debugWebsocket { + klog.V(5).Infof("Raw Message:\n%s\n", string(byMsg)) + } + + var mt interfaces.MessageType + if err := json.Unmarshal(byMsg, &mt); err != nil { + klog.V(1).Infof("json.Unmarshal(MessageType) failed. Err: %v\n", err) + klog.V(6).Infof("router.Message LEAVE\n") + return err + } + + var err error + switch interfaces.TypeResponse(mt.Type) { + case interfaces.TypeFlushedResponse: + err = r.processFlushed(byMsg) + case interfaces.TypeMetadataResponse: + err = r.processMetadata(byMsg) + case interfaces.TypeWarningResponse: + err = r.processWarningResponse(byMsg) + case interfaces.TypeResponse(interfaces.TypeErrorResponse): + err = r.processErrorResponse(byMsg) + default: + err = r.UnhandledMessage(byMsg) + klog.V(1).Infof("Message type %s is unhandled\n", mt.Type) + } + + if err == nil { + klog.V(6).Infof("MessageType(%s) after - Result: succeeded\n", mt.Type) + } else { + klog.V(5).Infof("MessageType(%s) after - Result: %v\n", mt.Type, err) + } + klog.V(6).Infof("router.Message LEAVE\n") + return err +} + +// Binary handles binary messages +func (r *CallbackRouter) Binary(byMsg []byte) error { + klog.V(6).Infof("router.Binary ENTER\n") + + err := r.callback.Binary(byMsg) + if err != nil { + klog.V(1).Infof("callback.Binary failed. Err: %v\n", err) + } else { + klog.V(5).Infof("callback.Binary succeeded\n") + } + + klog.V(6).Infof("router.Binary LEAVE\n") + return err +} + +// UnhandledMessage logs and handles any unexpected message types +func (r *CallbackRouter) UnhandledMessage(byMsg []byte) error { + action := func(data *interface{}) error { + return r.callback.UnhandledEvent(byMsg) + } + + err := r.processGeneric(string(interfaces.TypeUnhandledResponse), byMsg, action, byMsg) + if err != nil { + klog.V(1).Infof("callback.UnhandledEvent failed. Err: %v\n", err) + } + + return ErrInvalidMessageType +} + +// printDebugMessages formats and logs debugging messages +func (r *CallbackRouter) printDebugMessages(level klog.Level, function string, byMsg []byte) { + prettyJSON, err := prettyjson.Format(byMsg) + if err != nil { + klog.V(1).Infof("prettyjson.Format failed. Err: %v\n", err) + return + } + klog.V(level).Infof("\n\n-----------------------------------------------\n") + klog.V(level).Infof("%s RAW:\n%s\n", function, prettyJSON) + klog.V(level).Infof("-----------------------------------------------\n\n\n") +} diff --git a/pkg/api/speak/v1/websocket/chan_default.go b/pkg/api/speak/v1/websocket/chan_default.go new file mode 100644 index 00000000..1719e399 --- /dev/null +++ b/pkg/api/speak/v1/websocket/chan_default.go @@ -0,0 +1,315 @@ +// Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package websocketv1 + +import ( + "encoding/json" + "fmt" + "os" + "strings" + "sync" + + prettyjson "github.com/hokaccha/go-prettyjson" + klog "k8s.io/klog/v2" + + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" +) + +// NewDefaultChanHandler creates a new DefaultChanHandler +func NewDefaultChanHandler() DefaultChanHandler { + var debugStr string + if v := os.Getenv("DEEPGRAM_DEBUG"); v != "" { + klog.V(4).Infof("DEEPGRAM_DEBUG found") + debugStr = v + } + var debugExtStr string + if v := os.Getenv("DEEPGRAM_DEBUG_VERBOSE"); v != "" { + klog.V(4).Infof("DEEPGRAM_DEBUG_VERBOSE found") + debugExtStr = v + } + + handler := DefaultChanHandler{ + debugWebsocket: strings.EqualFold(debugStr, "true"), + debugWebsocketVerbose: strings.EqualFold(debugExtStr, "true"), + binaryChan: make(chan *[]byte), + openChan: make(chan *interfaces.OpenResponse), + metadataChan: make(chan *interfaces.MetadataResponse), + flushedChan: make(chan *interfaces.FlushedResponse), + closeChan: make(chan *interfaces.CloseResponse), + warningChan: make(chan *interfaces.WarningResponse), + errorChan: make(chan *interfaces.ErrorResponse), + unhandledChan: make(chan *[]byte), + } + + go func() { + err := handler.Run() + if err != nil { + klog.V(1).Infof("handler.Run failed. Err: %v\n", err) + } + }() + + return handler +} + +// GetBinary returns the binary event channels +func (dch DefaultChanHandler) GetBinary() []*chan *[]byte { + return []*chan *[]byte{&dch.binaryChan} +} + +// GetOpen returns the open channels +func (dch DefaultChanHandler) GetOpen() []*chan *interfaces.OpenResponse { + return []*chan *interfaces.OpenResponse{&dch.openChan} +} + +// GetMetadata returns the metadata channels +func (dch DefaultChanHandler) GetMetadata() []*chan *interfaces.MetadataResponse { + return []*chan *interfaces.MetadataResponse{&dch.metadataChan} +} + +// GetfFlush returns the flush channels +func (dch DefaultChanHandler) GetFlush() []*chan *interfaces.FlushedResponse { + return []*chan *interfaces.FlushedResponse{&dch.flushedChan} +} + +// GetClose returns the close channels +func (dch DefaultChanHandler) GetClose() []*chan *interfaces.CloseResponse { + return []*chan *interfaces.CloseResponse{&dch.closeChan} +} + +// GetWarning returns the warning channels +func (dch DefaultChanHandler) GetWarning() []*chan *interfaces.WarningResponse { + return []*chan *interfaces.WarningResponse{&dch.warningChan} +} + +// GetError returns the error channels +func (dch DefaultChanHandler) GetError() []*chan *interfaces.ErrorResponse { + return []*chan *interfaces.ErrorResponse{&dch.errorChan} +} + +// GetUnhandled returns the unhandled event channels +func (dch DefaultChanHandler) GetUnhandled() []*chan *[]byte { + return []*chan *[]byte{&dch.unhandledChan} +} + +// Open is the callback for when the connection opens +// +//nolint:funlen,gocyclo // this is a complex function. keep as is +func (dch DefaultChanHandler) Run() error { + wgReceivers := sync.WaitGroup{} + + // binary channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for br := range dch.binaryChan { + fmt.Printf("\n\n[Binary Data]\n\n") + fmt.Printf("Size: %d\n\n", len(*br)) + + if dch.debugWebsocket { + fmt.Printf("Hex Dump: %x...\n\n", (*br)[:20]) + } + if dch.debugWebsocketVerbose { + fmt.Printf("Dumping to verbose.wav\n") + file, err := os.OpenFile("verbose.wav", os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil { + fmt.Printf("Failed to open file. Err: %v\n", err) + continue + } + + _, err = file.Write(*br) + file.Close() + + if err != nil { + fmt.Printf("Failed to write to file. Err: %v\n", err) + continue + } + } + } + }() + + // open channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for or := range dch.openChan { + if dch.debugWebsocket { + data, err := json.Marshal(or) + if err != nil { + klog.V(1).Infof("Open json.Marshal failed. Err: %v\n", err) + continue + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + continue + } + klog.V(2).Infof("\n\nOpen Object:\n%s\n\n", prettyJSON) + } + + fmt.Printf("\n\n[OpenResponse]\n\n") + } + }() + + // metadata channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for mr := range dch.metadataChan { + if dch.debugWebsocket { + data, err := json.Marshal(mr) + if err != nil { + klog.V(1).Infof("Close json.Marshal failed. Err: %v\n", err) + continue + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + continue + } + klog.V(2).Infof("\n\nMetadata Object:\n%s\n\n", prettyJSON) + } + + fmt.Printf("\n\nMetadata.RequestID: %s\n", strings.TrimSpace(mr.RequestID)) + } + }() + + // speech started channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for ssr := range dch.flushedChan { + if dch.debugWebsocket { + data, err := json.Marshal(ssr) + if err != nil { + klog.V(1).Infof("Close json.Marshal failed. Err: %v\n", err) + continue + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + continue + } + klog.V(2).Infof("\n\nFlushed Object:\n%s\n\n", prettyJSON) + } + + fmt.Printf("\n[Flushed]\n") + } + }() + + // close channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for cr := range dch.closeChan { + if dch.debugWebsocket { + data, err := json.Marshal(cr) + if err != nil { + klog.V(1).Infof("Close json.Marshal failed. Err: %v\n", err) + continue + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + continue + } + klog.V(2).Infof("\n\nClose Object:\n%s\n\n", prettyJSON) + } + + fmt.Printf("\n\n[CloseResponse]\n\n") + } + }() + + // warning channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for wr := range dch.warningChan { + if dch.debugWebsocket { + data, err := json.Marshal(wr) + if err != nil { + klog.V(1).Infof("Close json.Marshal failed. Err: %v\n", err) + continue + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + continue + } + klog.V(2).Infof("\n\nWarning Object:\n%s\n\n", prettyJSON) + } + + fmt.Printf("\n[Warning]\n") + fmt.Printf("\nWarning.Type: %s\n", wr.WarnCode) + fmt.Printf("Warning.Message: %s\n", wr.WarnMsg) + fmt.Printf("Warning.Description: %s\n\n", wr.Description) + fmt.Printf("Warning.Variant: %s\n\n", wr.Variant) + } + }() + + // error channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for er := range dch.errorChan { + if dch.debugWebsocket { + data, err := json.Marshal(er) + if err != nil { + klog.V(1).Infof("Close json.Marshal failed. Err: %v\n", err) + continue + } + + prettyJSON, err := prettyjson.Format(data) + if err != nil { + klog.V(1).Infof("prettyjson.Marshal failed. Err: %v\n", err) + continue + } + klog.V(2).Infof("\n\nError Object:\n%s\n\n", prettyJSON) + } + + fmt.Printf("\n[ErrorResponse]\n") + fmt.Printf("\nError.Type: %s\n", er.ErrCode) + fmt.Printf("Error.Message: %s\n", er.ErrMsg) + fmt.Printf("Error.Description: %s\n\n", er.Description) + fmt.Printf("Error.Variant: %s\n\n", er.Variant) + } + }() + + // unhandled event channel + wgReceivers.Add(1) + go func() { + defer wgReceivers.Done() + + for byData := range dch.unhandledChan { + if dch.debugWebsocket { + prettyJSON, err := prettyjson.Format(*byData) + if err != nil { + klog.V(2).Infof("\n\nRaw Data:\n%s\n\n", string(*byData)) + } else { + klog.V(2).Infof("\n\nError Object:\n%s\n\n", prettyJSON) + } + } + + fmt.Printf("\n[UnhandledEvent]") + fmt.Printf("Dump:\n%s\n\n", string(*byData)) + } + }() + + // wait for all receivers to finish + wgReceivers.Wait() + + return nil +} diff --git a/pkg/api/speak/v1/websocket/chan_router.go b/pkg/api/speak/v1/websocket/chan_router.go new file mode 100644 index 00000000..c196d30d --- /dev/null +++ b/pkg/api/speak/v1/websocket/chan_router.go @@ -0,0 +1,300 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package websocketv1 + +import ( + "encoding/hex" + "encoding/json" + "os" + "strings" + + prettyjson "github.com/hokaccha/go-prettyjson" + klog "k8s.io/klog/v2" + + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" +) + +// NewWithDefault creates a ChanRouter with the default callback handler +func NewChanRouterWithDefault() *ChanRouter { + chans := NewDefaultChanHandler() + go func() { + err := chans.Run() + if err != nil { + klog.V(1).Infof("chans.Run failed. Err: %v\n", err) + } + }() + + return NewChanRouter(chans) +} + +// New creates a ChanRouter with a user-defined channels +// gocritic:ignore +func NewChanRouter(chans interfaces.SpeakMessageChan) *ChanRouter { + var debugStr string + if v := os.Getenv("DEEPGRAM_DEBUG"); v != "" { + klog.V(4).Infof("DEEPGRAM_DEBUG found") + debugStr = v + } + + router := &ChanRouter{ + debugWebsocket: strings.EqualFold(strings.ToLower(debugStr), "true"), + binaryChan: make([]*chan *[]byte, 0), + openChan: make([]*chan *interfaces.OpenResponse, 0), + metadataChan: make([]*chan *interfaces.MetadataResponse, 0), + flushedChan: make([]*chan *interfaces.FlushedResponse, 0), + closeChan: make([]*chan *interfaces.CloseResponse, 0), + warningChan: make([]*chan *interfaces.WarningResponse, 0), + errorChan: make([]*chan *interfaces.ErrorResponse, 0), + unhandledChan: make([]*chan *[]byte, 0), + } + + if chans != nil { + router.binaryChan = append(router.binaryChan, chans.GetBinary()...) + router.openChan = append(router.openChan, chans.GetOpen()...) + router.metadataChan = append(router.metadataChan, chans.GetMetadata()...) + router.flushedChan = append(router.flushedChan, chans.GetFlush()...) + router.closeChan = append(router.closeChan, chans.GetClose()...) + router.warningChan = append(router.warningChan, chans.GetWarning()...) + router.errorChan = append(router.errorChan, chans.GetError()...) + router.unhandledChan = append(router.unhandledChan, chans.GetUnhandled()...) + } + + return router +} + +// Open sends an OpenResponse message to the callback +func (r *ChanRouter) Open(or *interfaces.OpenResponse) error { + byMsg, err := json.Marshal(or) + if err != nil { + klog.V(1).Infof("json.Marshal(or) failed. Err: %v\n", err) + return err + } + + action := func(data []byte) error { + var msg interfaces.OpenResponse + if err := json.Unmarshal(data, &msg); err != nil { + klog.V(1).Infof("json.Unmarshal(OpenResponse) failed. Err: %v\n", err) + return err + } + + for _, ch := range r.openChan { + *ch <- &msg + } + return nil + } + + return r.processGeneric(string(interfaces.TypeOpenResponse), byMsg, action) +} + +// Close sends an CloseResponse message to the callback +func (r *ChanRouter) Close(cr *interfaces.CloseResponse) error { + byMsg, err := json.Marshal(cr) + if err != nil { + klog.V(1).Infof("json.Marshal(or) failed. Err: %v\n", err) + return err + } + + action := func(data []byte) error { + var msg interfaces.CloseResponse + if err := json.Unmarshal(data, &msg); err != nil { + klog.V(1).Infof("json.Unmarshal(CloseResponse) failed. Err: %v\n", err) + return err + } + + for _, ch := range r.closeChan { + *ch <- &msg + } + return nil + } + + return r.processGeneric(string(interfaces.TypeCloseResponse), byMsg, action) +} + +// Error sends an ErrorResponse message to the callback +func (r *ChanRouter) Error(er *interfaces.ErrorResponse) error { + byMsg, err := json.Marshal(er) + if err != nil { + klog.V(1).Infof("json.Marshal(er) failed. Err: %v\n", err) + return err + } + + action := func(data []byte) error { + var msg interfaces.ErrorResponse + if err := json.Unmarshal(data, &msg); err != nil { + klog.V(1).Infof("json.Unmarshal(ErrorResponse) failed. Err: %v\n", err) + return err + } + + for _, ch := range r.errorChan { + *ch <- &msg + } + return nil + } + + return r.processGeneric(string(interfaces.TypeErrorResponse), byMsg, action) +} + +// processGeneric generalizes the handling of all message types +func (r *ChanRouter) processGeneric(msgType string, byMsg []byte, action func(data []byte) error) error { + klog.V(6).Infof("router.%s ENTER\n", msgType) + + r.printDebugMessages(5, msgType, byMsg) + + var err error + if err = action(byMsg); err != nil { + klog.V(1).Infof("callback.%s failed. Err: %v\n", msgType, err) + } else { + klog.V(5).Infof("callback.%s succeeded\n", msgType) + } + klog.V(6).Infof("router.%s LEAVE\n", msgType) + + return err +} + +func (r *ChanRouter) processMetadata(byMsg []byte) error { + action := func(data []byte) error { + var msg interfaces.MetadataResponse + if err := json.Unmarshal(byMsg, &msg); err != nil { + klog.V(1).Infof("json.Unmarshal(MetadataResponse) failed. Err: %v\n", err) + return err + } + + for _, ch := range r.metadataChan { + *ch <- &msg + } + return nil + } + + return r.processGeneric(string(interfaces.TypeMetadataResponse), byMsg, action) +} + +func (r *ChanRouter) processFlushed(byMsg []byte) error { + action := func(data []byte) error { + var msg interfaces.FlushedResponse + if err := json.Unmarshal(byMsg, &msg); err != nil { + klog.V(1).Infof("json.Unmarshal(FlushedResponse) failed. Err: %v\n", err) + return err + } + + for _, ch := range r.flushedChan { + *ch <- &msg + } + return nil + } + + return r.processGeneric(string(interfaces.TypeFlushedResponse), byMsg, action) +} + +func (r *ChanRouter) processWarningResponse(byMsg []byte) error { + action := func(data []byte) error { + var msg interfaces.WarningResponse + if err := json.Unmarshal(data, &msg); err != nil { + klog.V(1).Infof("json.Unmarshal(WarningResponse) failed. Err: %v\n", err) + return err + } + + for _, ch := range r.warningChan { + *ch <- &msg + } + return nil + } + + return r.processGeneric(string(interfaces.TypeWarningResponse), byMsg, action) +} + +func (r *ChanRouter) processErrorResponse(byMsg []byte) error { + action := func(data []byte) error { + var msg interfaces.ErrorResponse + if err := json.Unmarshal(byMsg, &msg); err != nil { + klog.V(1).Infof("json.Unmarshal(MessageResponse) failed. Err: %v\n", err) + return err + } + + for _, ch := range r.errorChan { + *ch <- &msg + } + return nil + } + + return r.processGeneric(string(interfaces.TypeErrorResponse), byMsg, action) +} + +// Message handles platform messages and routes them appropriately based on the MessageType +func (r *ChanRouter) Message(byMsg []byte) error { + klog.V(6).Infof("router.Message ENTER\n") + + if r.debugWebsocket { + klog.V(5).Infof("Raw Message:\n%s\n", string(byMsg)) + } + + var mt interfaces.MessageType + if err := json.Unmarshal(byMsg, &mt); err != nil { + klog.V(1).Infof("json.Unmarshal(MessageType) failed. Err: %v\n", err) + klog.V(6).Infof("router.Message LEAVE\n") + return err + } + + var err error + switch interfaces.TypeResponse(mt.Type) { + case interfaces.TypeMetadataResponse: + err = r.processMetadata(byMsg) + case interfaces.TypeFlushedResponse: + err = r.processFlushed(byMsg) + case interfaces.TypeWarningResponse: + err = r.processWarningResponse(byMsg) + case interfaces.TypeResponse(interfaces.TypeErrorResponse): + err = r.processErrorResponse(byMsg) + default: + err = r.UnhandledMessage(byMsg) + } + + if err == nil { + klog.V(6).Infof("MessageType(%s) after - Result: succeeded\n", mt.Type) + } else { + klog.V(5).Infof("MessageType(%s) after - Result: %v\n", mt.Type, err) + } + klog.V(6).Infof("router.Message LEAVE\n") + return err +} + +// Binary handles platform messages and routes them appropriately based on the MessageType +func (r *ChanRouter) Binary(byMsg []byte) error { + klog.V(6).Infof("router.Binary ENTER\n") + + klog.V(5).Infof("Binary Message:\n%s...\n", hex.EncodeToString(byMsg[:20])) + for _, ch := range r.binaryChan { + *ch <- &byMsg + } + + klog.V(6).Infof("router.Binary LEAVE\n") + return nil +} + +// UnhandledMessage logs and handles any unexpected message types +func (r *ChanRouter) UnhandledMessage(byMsg []byte) error { + klog.V(6).Infof("router.UnhandledMessage ENTER\n") + r.printDebugMessages(3, "UnhandledMessage", byMsg) + + for _, ch := range r.unhandledChan { + *ch <- &byMsg + } + + klog.V(1).Infof("Unknown Event was received\n") + klog.V(6).Infof("router.UnhandledMessage LEAVE\n") + + return ErrInvalidMessageType +} + +// printDebugMessages formats and logs debugging messages +func (r *ChanRouter) printDebugMessages(level klog.Level, function string, byMsg []byte) { + prettyJSON, err := prettyjson.Format(byMsg) + if err != nil { + klog.V(1).Infof("prettyjson.Format failed. Err: %v\n", err) + return + } + klog.V(level).Infof("\n\n-----------------------------------------------\n") + klog.V(level).Infof("%s RAW:\n%s\n", function, prettyJSON) + klog.V(level).Infof("-----------------------------------------------\n\n\n") +} diff --git a/pkg/api/speak/v1/websocket/constants.go b/pkg/api/speak/v1/websocket/constants.go new file mode 100644 index 00000000..3b175d19 --- /dev/null +++ b/pkg/api/speak/v1/websocket/constants.go @@ -0,0 +1,25 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package websocketv1 + +import ( + "errors" +) + +const ( + PackageVersion string = "v1.0" +) + +// errors +var ( + // ErrInvalidInput required input was not found + ErrInvalidInput = errors.New("required input was not found") + + // ErrInvalidMessageType invalid message type + ErrInvalidMessageType = errors.New("invalid message type") + + // ErrUserCallbackNotDefined user callback object not defined + ErrUserCallbackNotDefined = errors.New("user callback object not defined") +) diff --git a/pkg/api/speak/v1/websocket/interfaces/constants.go b/pkg/api/speak/v1/websocket/interfaces/constants.go new file mode 100644 index 00000000..454c7cda --- /dev/null +++ b/pkg/api/speak/v1/websocket/interfaces/constants.go @@ -0,0 +1,26 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package interfacesv1 + +import ( + commoninterfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1/interfaces" +) + +// These are the message types that can be received from the live API +type TypeResponse commoninterfaces.TypeResponse + +// These are the message types that can be received from the text-to-speech streaming API +const ( + // message types + TypeOpenResponse = commoninterfaces.TypeOpenResponse + TypeMetadataResponse TypeResponse = "Metadata" + TypeFlushedResponse TypeResponse = "Flushed" + TypeCloseResponse = commoninterfaces.TypeCloseResponse + + // "Error" type + TypeWarningResponse TypeResponse = "Warning" + TypeErrorResponse = commoninterfaces.TypeErrorResponse + TypeUnhandledResponse TypeResponse = "Unhandled" +) diff --git a/pkg/api/speak/v1/websocket/interfaces/interfaces.go b/pkg/api/speak/v1/websocket/interfaces/interfaces.go new file mode 100644 index 00000000..d991793a --- /dev/null +++ b/pkg/api/speak/v1/websocket/interfaces/interfaces.go @@ -0,0 +1,42 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +// This package defines interfaces for the live API +package interfacesv1 + +/* +Chan Interfaces +*/ +// SpeakMessageChan is a channel used to receive notifcations for platforms messages +type SpeakMessageChan interface { + // These are WS TextMessage that are used for flow control. + GetBinary() []*chan *[]byte + GetOpen() []*chan *OpenResponse + GetMetadata() []*chan *MetadataResponse + GetFlush() []*chan *FlushedResponse + GetClose() []*chan *CloseResponse + + GetWarning() []*chan *WarningResponse + GetError() []*chan *ErrorResponse + GetUnhandled() []*chan *[]byte +} + +/* +Callback Interfaces +*/ +// SpeakMessageCallback is a callback used to receive notifications for platforms messages +type SpeakMessageCallback interface { + // These are WS TextMessage that are used for flow control. + Open(or *OpenResponse) error + Metadata(md *MetadataResponse) error + Flush(fl *FlushedResponse) error + Close(cr *CloseResponse) error + + Warning(er *WarningResponse) error + Error(er *ErrorResponse) error + UnhandledEvent(byMsg []byte) error + + // These are WS BinaryMessage that are used to send audio data to the client + Binary(byMsg []byte) error +} diff --git a/pkg/api/speak/v1/websocket/interfaces/types.go b/pkg/api/speak/v1/websocket/interfaces/types.go new file mode 100644 index 00000000..c5e28bd7 --- /dev/null +++ b/pkg/api/speak/v1/websocket/interfaces/types.go @@ -0,0 +1,55 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package interfacesv1 + +import ( + commoninterfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1/interfaces" + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces" +) + +/***********************************/ +// Request/Input structs +/***********************************/ +type SpeakOptions interfaces.SpeakOptions + +/***********************************/ +// MessageType is the header to bootstrap you way unmarshalling other messages +/***********************************/ +/* + Example: + { + "type": "message", + "message": { + ... + } + } +*/ +type MessageType struct { + Type string `json:"type"` +} + +// OpenResponse is the response from opening the connection +type OpenResponse = commoninterfaces.OpenResponse + +// MetadataResponse is the response from the text-to-speech request which contains metadata about the request +type MetadataResponse struct { + Type string `json:"type,omitempty"` + RequestID string `json:"request_id,omitempty"` +} + +// FlushedResponse is the response which indicates that the server has flushed the buffer and is ready to return audio +type FlushedResponse struct { + Type string `json:"type,omitempty"` + SequenceID int `json:"sequence_id,omitempty"` +} + +// CloseResponse is the response from closing the connection +type CloseResponse = commoninterfaces.CloseResponse + +// WarningResponse is the Deepgram specific response warning +type WarningResponse = interfaces.DeepgramWarning + +// ErrorResponse is the Deepgram specific response error +type ErrorResponse = interfaces.DeepgramError diff --git a/pkg/api/speak/v1/websocket/types.go b/pkg/api/speak/v1/websocket/types.go new file mode 100644 index 00000000..ecd8e323 --- /dev/null +++ b/pkg/api/speak/v1/websocket/types.go @@ -0,0 +1,63 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package websocketv1 + +import ( + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" +) + +/* +Using Channels +*/ +// DefaultCallbackHandler is a default callback handler for live transcription +// Simply prints the transcript to stdout +type DefaultChanHandler struct { + debugWebsocket bool + debugWebsocketVerbose bool + + binaryChan chan *[]byte + openChan chan *interfaces.OpenResponse + metadataChan chan *interfaces.MetadataResponse + flushedChan chan *interfaces.FlushedResponse + closeChan chan *interfaces.CloseResponse + warningChan chan *interfaces.WarningResponse + errorChan chan *interfaces.ErrorResponse + unhandledChan chan *[]byte +} + +// ChanRouter routes events +type ChanRouter struct { + debugWebsocket bool + + // call out to channels + binaryChan []*chan *[]byte + openChan []*chan *interfaces.OpenResponse + metadataChan []*chan *interfaces.MetadataResponse + flushedChan []*chan *interfaces.FlushedResponse + closeChan []*chan *interfaces.CloseResponse + warningChan []*chan *interfaces.WarningResponse + errorChan []*chan *interfaces.ErrorResponse + unhandledChan []*chan *[]byte +} + +/* +Using Callbacks +*/ +// DefaultCallbackHandler is a default callback handler for live transcription +// Simply prints the transcript to stdout +type DefaultCallbackHandler struct { + debugWebsocket bool + debugWebsocketVerbose bool +} + +// CallbackRouter routes events +type CallbackRouter struct { + debugWebsocket bool + callback interfaces.SpeakMessageCallback +} + +// MessageRouter is the interface for routing messages +// Deprecated: Use CallbackRouter instead +type MessageRouter = CallbackRouter diff --git a/pkg/api/version/speakstream-version.go b/pkg/api/version/speakstream-version.go index 287fc220..aa37da83 100644 --- a/pkg/api/version/speakstream-version.go +++ b/pkg/api/version/speakstream-version.go @@ -29,6 +29,6 @@ mechanism for: The return value is the complete URL endpoint to be used for the text-to-speech */ -func GetSpeakStreamAPI(ctx context.Context, host, version, path string, options *interfaces.SpeakOptions, args ...interface{}) (string, error) { +func GetSpeakStreamAPI(ctx context.Context, host, version, path string, options *interfaces.WSSpeakOptions, args ...interface{}) (string, error) { return getAPIURL(ctx, "speak-stream", host, version, path, options, args...) } diff --git a/pkg/client/common/v1/interfaces/constants.go b/pkg/client/common/v1/interfaces/constants.go new file mode 100644 index 00000000..01fc237a --- /dev/null +++ b/pkg/client/common/v1/interfaces/constants.go @@ -0,0 +1,15 @@ +// Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package interfacesv1 + +// These are the message types that can be received from the live API +type TypeResponse string + +const ( + // message types + TypeOpenResponse TypeResponse = "Open" + TypeCloseResponse TypeResponse = "Close" + TypeErrorResponse TypeResponse = "Error" +) diff --git a/pkg/client/common/v1/interfaces/interfaces.go b/pkg/client/common/v1/interfaces/interfaces.go index 4492e930..9d16076e 100644 --- a/pkg/client/common/v1/interfaces/interfaces.go +++ b/pkg/client/common/v1/interfaces/interfaces.go @@ -5,6 +5,38 @@ // This package defines interfaces for the live API package interfacesv1 +import "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces" + +/* +Shared Structs +*/ +// OpenResponse is the response from the connection opening +type OpenResponse struct { + Type string `json:"type,omitempty"` +} + +// CloseResponse is the response from the connection closing +type CloseResponse struct { + Type string `json:"type,omitempty"` +} + +// ErrorResponse is the Deepgram specific response error +type ErrorResponse = interfaces.DeepgramError + +/* +Router definition +*/ +type Router interface { + Open(or *OpenResponse) error + Message(byMsg []byte) error + Binary(byMsg []byte) error + Close(or *CloseResponse) error + Error(er *ErrorResponse) error +} + +/* +WebSocketHandler this defines the things you need to implement for your specific WS protocol +*/ type WebSocketHandler interface { // GetURL returns the URL for the websocket connection. This has already been processed through pkg/api/version GetURL(host string) (string, error) diff --git a/pkg/client/common/v1/types.go b/pkg/client/common/v1/types.go index 55d0660d..639bace1 100644 --- a/pkg/client/common/v1/types.go +++ b/pkg/client/common/v1/types.go @@ -36,6 +36,7 @@ type WSClient struct { retryCnt int64 processMessages *commonv1interfaces.WebSocketHandler + router *commonv1interfaces.Router } // *************************** diff --git a/pkg/client/common/v1/websocket.go b/pkg/client/common/v1/websocket.go index 6102b07e..3998ceb4 100644 --- a/pkg/client/common/v1/websocket.go +++ b/pkg/client/common/v1/websocket.go @@ -22,7 +22,7 @@ import ( ) // gocritic:ignore -func NewWS(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, options *clientinterfaces.ClientOptions, processMessages *commonv1interfaces.WebSocketHandler) *WSClient { +func NewWS(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, options *clientinterfaces.ClientOptions, processMessages *commonv1interfaces.WebSocketHandler, router *commonv1interfaces.Router) *WSClient { if apiKey != "" { options.APIKey = apiKey } @@ -39,6 +39,7 @@ func NewWS(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, opt ctxCancel: ctxCancel, retry: true, processMessages: processMessages, + router: router, } return &c @@ -196,6 +197,14 @@ func (c *WSClient) internalConnectWithCancel(ctx context.Context, ctxCancel cont // start WS specific items (*c.processMessages).Start() + // fire off close connection + err = (*c.router).Open(&commonv1interfaces.OpenResponse{ + Type: string(commonv1interfaces.TypeOpenResponse), + }) + if err != nil { + klog.V(1).Infof("router.Open failed. Err: %v\n", err) + } + klog.V(3).Infof("WebSocket Connection Successful!") klog.V(7).Infof("live.internalConnectWithCancel() LEAVE\n") @@ -533,6 +542,14 @@ func (c *WSClient) closeWs(fatal bool) { if fatal || c.wsconn != nil { // process WS specific items (*c.processMessages).Finish() + + // fire off close connection + err := (*c.router).Close(&commonv1interfaces.CloseResponse{ + Type: string(commonv1interfaces.TypeCloseResponse), + }) + if err != nil { + klog.V(1).Infof("router.CloseHelper failed. Err: %v\n", err) + } } // close the connection diff --git a/pkg/client/interfaces/interfaces.go b/pkg/client/interfaces/interfaces.go index 9b7e4724..8fb3050b 100644 --- a/pkg/client/interfaces/interfaces.go +++ b/pkg/client/interfaces/interfaces.go @@ -17,3 +17,4 @@ type PreRecordedTranscriptionOptions = interfacesv1.PreRecordedTranscriptionOpti type LiveTranscriptionOptions = interfacesv1.LiveTranscriptionOptions type AnalyzeOptions = interfacesv1.AnalyzeOptions type SpeakOptions = interfacesv1.SpeakOptions +type WSSpeakOptions = interfacesv1.WSSpeakOptions diff --git a/pkg/client/interfaces/v1/options.go b/pkg/client/interfaces/v1/options.go index efaee632..a89433b0 100644 --- a/pkg/client/interfaces/v1/options.go +++ b/pkg/client/interfaces/v1/options.go @@ -52,7 +52,7 @@ func (o *ClientOptions) Parse() error { // prerecorded // currently nothing - // websocket + // speech-to-text websocket if v := os.Getenv("DEEPGRAM_WEBSOCKET_REDIRECT"); v != "" { klog.V(3).Infof("DEEPGRAM_WEBSOCKET_REDIRECT found") o.RedirectService = strings.EqualFold(strings.ToLower(v), "true") @@ -63,22 +63,39 @@ func (o *ClientOptions) Parse() error { } // these require inspecting messages, therefore you must update the InspectMessage() method - if v := os.Getenv("DEEPGRAM_WEBSOCKET_AUTO_FLUSH"); v != "" { - klog.V(3).Infof("DEEPGRAM_WEBSOCKET_AUTO_FLUSH found") + if v := os.Getenv("DEEPGRAM_WEBSOCKET_REPLY_AUTO_FLUSH"); v != "" { + klog.V(3).Infof("DEEPGRAM_WEBSOCKET_REPLY_AUTO_FLUSH found") i, err := strconv.ParseInt(v, 10, 64) if err == nil { - klog.V(3).Infof("DEEPGRAM_WEBSOCKET_AUTO_FLUSH set to %d", i) + klog.V(3).Infof("DEEPGRAM_WEBSOCKET_REPLY_AUTO_FLUSH set to %d", i) o.AutoFlushReplyDelta = i } } + // text-to-speech websocket + // these require inspecting messages, therefore you must update the InspectMessage() method + if v := os.Getenv("DEEPGRAM_WEBSOCKET_SPEAK_AUTO_FLUSH"); v != "" { + klog.V(3).Infof("DEEPGRAM_WEBSOCKET_SPEAK_AUTO_FLUSH found") + i, err := strconv.ParseInt(v, 10, 64) + if err == nil { + klog.V(3).Infof("DEEPGRAM_WEBSOCKET_SPEAK_AUTO_FLUSH set to %d", i) + o.AutoFlushSpeakDelta = i + } + } + return nil } -func (o *ClientOptions) InspectMessage() bool { +// InspectListenMessage returns true if the Listen message should be inspected +func (o *ClientOptions) InspectListenMessage() bool { return o.AutoFlushReplyDelta != 0 } +// InspectSpeakMessage returns true if the Speak message should be inspected +func (o *ClientOptions) InspectSpeakMessage() bool { + return o.AutoFlushSpeakDelta != 0 +} + func (o *PreRecordedTranscriptionOptions) Check() error { // checks // currently no op @@ -106,3 +123,10 @@ func (o *SpeakOptions) Check() error { return nil } + +func (o *WSSpeakOptions) Check() error { + // checks + // currently no op + + return nil +} diff --git a/pkg/client/interfaces/v1/types-client.go b/pkg/client/interfaces/v1/types-client.go index 678ea79a..33029fe5 100644 --- a/pkg/client/interfaces/v1/types-client.go +++ b/pkg/client/interfaces/v1/types-client.go @@ -17,10 +17,11 @@ type ClientOptions struct { // prerecorded client options - // live client options - RedirectService bool // allows HTTP redirects to be followed - EnableKeepAlive bool // enables the keep alive feature + // speech-to-text client options + RedirectService bool // allows HTTP redirects to be followed + EnableKeepAlive bool // enables the keep alive feature + AutoFlushReplyDelta int64 // enables the auto flush feature based on the delta in milliseconds - // these require inspecting messages, therefore you must update the InspectMessage() method - AutoFlushReplyDelta int64 // enables the auto flush feature + // text-to-speech client options + AutoFlushSpeakDelta int64 // enables the auto flush feature based on the delta in milliseconds } diff --git a/pkg/client/interfaces/v1/types-speak.go b/pkg/client/interfaces/v1/types-speak.go index 3f6e28d6..39720b3b 100644 --- a/pkg/client/interfaces/v1/types-speak.go +++ b/pkg/client/interfaces/v1/types-speak.go @@ -20,3 +20,17 @@ type SpeakOptions struct { Callback string `json:"callback,omitempty" schema:"callback,omitempty"` CallbackMethod string `json:"callback_method,omitempty" schema:"callback_method,omitempty"` } + +/* +WSSpeakOptions contain all of the knobs and dials to transform text into speech +using the Deepgram API + +Please see the text-to-speech documentation for more details: +TODO +*/ +type WSSpeakOptions struct { + Model string `json:"model,omitempty" schema:"model,omitempty"` + Encoding string `json:"encoding,omitempty" schema:"encoding,omitempty"` + SampleRate int `json:"sample_rate,omitempty" schema:"sample_rate,omitempty"` + BitRate int `json:"bit_rate,omitempty" schema:"bit_rate,omitempty"` +} diff --git a/pkg/client/listen/client.go b/pkg/client/listen/client.go index 6de00ca9..7446ab35 100644 --- a/pkg/client/listen/client.go +++ b/pkg/client/listen/client.go @@ -186,13 +186,13 @@ Notes: - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY - The chans handler is set to the default handler which just prints all messages to the console */ -func NewWSUsingChanWithDefaults(ctx context.Context, options *interfaces.LiveTranscriptionOptions, chans *msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { +func NewWSUsingChanWithDefaults(ctx context.Context, options *interfaces.LiveTranscriptionOptions, chans msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { return listenv1ws.NewUsingChanWithDefaults(ctx, options, chans) } // NewWebSocketUsingChanWithDefaults is an alias for NewWSUsingChanWithDefaults // TODO: Deprecate this function later -func NewWebSocketUsingChanWithDefaults(ctx context.Context, options *interfaces.LiveTranscriptionOptions, chans *msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { +func NewWebSocketUsingChanWithDefaults(ctx context.Context, options *interfaces.LiveTranscriptionOptions, chans msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { return NewWSUsingChanWithDefaults(ctx, options, chans) } @@ -206,14 +206,14 @@ Input parameters: - tOptions: LiveTranscriptionOptions which allows overriding things like language, model, etc. - chans: LiveMessageCallback which is a chans that allows you to perform actions based on the transcription */ -func NewWSUsingChan(ctx context.Context, apiKey string, cOptions *interfaces.ClientOptions, tOptions *interfaces.LiveTranscriptionOptions, chans *msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { +func NewWSUsingChan(ctx context.Context, apiKey string, cOptions *interfaces.ClientOptions, tOptions *interfaces.LiveTranscriptionOptions, chans msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { ctx, ctxCancel := context.WithCancel(ctx) return listenv1ws.NewUsingChanWithCancel(ctx, ctxCancel, apiKey, cOptions, tOptions, chans) } // NewWebSocketUsingChan is an alias for NewWSUsingChan // TODO: Deprecate this function later -func NewWebSocketUsingChan(ctx context.Context, apiKey string, cOptions *interfaces.ClientOptions, tOptions *interfaces.LiveTranscriptionOptions, chans *msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { +func NewWebSocketUsingChan(ctx context.Context, apiKey string, cOptions *interfaces.ClientOptions, tOptions *interfaces.LiveTranscriptionOptions, chans msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { return NewWSUsingChan(ctx, apiKey, cOptions, tOptions, chans) } @@ -228,13 +228,13 @@ Input parameters: - tOptions: LiveTranscriptionOptions which allows overriding things like language, model, etc. - chans: LiveMessageCallback which is a chans that allows you to perform actions based on the transcription */ -func NewWSUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *interfaces.ClientOptions, tOptions *interfaces.LiveTranscriptionOptions, chans *msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { +func NewWSUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *interfaces.ClientOptions, tOptions *interfaces.LiveTranscriptionOptions, chans msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { return listenv1ws.NewUsingChanWithCancel(ctx, ctxCancel, apiKey, cOptions, tOptions, chans) } // NewWebSocketUsingChanWithCancel is an alias for NewWSUsingChanWithCancel // TODO: Deprecate this function later -func NewWebSocketUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *interfaces.ClientOptions, tOptions *interfaces.LiveTranscriptionOptions, chans *msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { +func NewWebSocketUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *interfaces.ClientOptions, tOptions *interfaces.LiveTranscriptionOptions, chans msginterfaces.LiveMessageChan) (*listenv1ws.WSChannel, error) { return NewWSUsingChanWithCancel(ctx, ctxCancel, apiKey, cOptions, tOptions, chans) } diff --git a/pkg/client/listen/v1/websocket/client_callback.go b/pkg/client/listen/v1/websocket/client_callback.go index 42e49bcd..f0dcc9b3 100644 --- a/pkg/client/listen/v1/websocket/client_callback.go +++ b/pkg/client/listen/v1/websocket/client_callback.go @@ -75,7 +75,7 @@ func (c *WSCallback) ProcessMessage(wsType int, byMsg []byte) error { klog.V(6).Infof("ProcessMessage() ENTER\n") // inspect the message - if c.cOptions.InspectMessage() { + if c.cOptions.InspectListenMessage() { err := c.inspect(byMsg) if err != nil { klog.V(1).Infof("ProcessMessage: inspect failed. Err: %v\n", err) @@ -123,6 +123,10 @@ func (c *WSCallback) Stream(r io.Reader) error { klog.V(3).Infof("Graceful websocket close\n") klog.V(6).Infof("live.Stream() LEAVE\n") return nil + case strings.Contains(errStr, common.UseOfClosedSocket): + klog.V(3).Infof("Graceful websocket close\n") + klog.V(6).Infof("live.Stream() LEAVE\n") + return nil case strings.Contains(errStr, common.FatalReadSocketErr): klog.V(1).Infof("Fatal socket error: %v\n", err) klog.V(6).Infof("live.Stream() LEAVE\n") diff --git a/pkg/client/listen/v1/websocket/client_channel.go b/pkg/client/listen/v1/websocket/client_channel.go index 58cec4d2..4e9d3c81 100644 --- a/pkg/client/listen/v1/websocket/client_channel.go +++ b/pkg/client/listen/v1/websocket/client_channel.go @@ -75,7 +75,7 @@ func (c *WSChannel) ProcessMessage(wsType int, byMsg []byte) error { klog.V(6).Infof("ProcessMessage() ENTER\n") // inspect the message - if c.cOptions.InspectMessage() { + if c.cOptions.InspectListenMessage() { err := c.inspect(byMsg) if err != nil { klog.V(1).Infof("ProcessMessage: inspect failed. Err: %v\n", err) @@ -123,6 +123,10 @@ func (c *WSChannel) Stream(r io.Reader) error { klog.V(3).Infof("Graceful websocket close\n") klog.V(6).Infof("live.Stream() LEAVE\n") return nil + case strings.Contains(errStr, common.UseOfClosedSocket): + klog.V(3).Infof("Graceful websocket close\n") + klog.V(6).Infof("live.Stream() LEAVE\n") + return nil case strings.Contains(errStr, common.FatalReadSocketErr): klog.V(1).Infof("Fatal socket error: %v\n", err) klog.V(6).Infof("live.Stream() LEAVE\n") diff --git a/pkg/client/listen/v1/websocket/constants.go b/pkg/client/listen/v1/websocket/constants.go index 2f45cabe..e5443951 100644 --- a/pkg/client/listen/v1/websocket/constants.go +++ b/pkg/client/listen/v1/websocket/constants.go @@ -20,16 +20,6 @@ const ( TerminationSleep = 100 * time.Millisecond ) -// // socket errors -// FatalReadSocketErr string = "read: can't assign requested address" -// FatalWriteSocketErr string = "write: broken pipe" -// UseOfClosedSocket string = "use of closed network connection" -// UnknownDeepgramErr string = "unknown deepgram error" - -// // socket successful close error -// SuccessfulSocketErr string = "close 1000" -// ) - const ( // MessageTypeKeepAlive keep the connection alive MessageTypeKeepAlive string = "KeepAlive" @@ -38,18 +28,6 @@ const ( MessageTypeFinalize string = "Finalize" ) -// // errors -// var ( -// // ErrInvalidInput required input was not found -// ErrInvalidInput = errors.New("required input was not found") - -// // ErrInvalidConnection connection is not valid -// ErrInvalidConnection = errors.New("connection is not valid") - -// // ErrFatalPanicRecovered fatal panic recovered -// ErrFatalPanicRecovered = errors.New("fatal panic - attempt to recover") -// ) - // internal constants for retry, waits, back-off, etc. const ( flushPeriod = 500 * time.Millisecond diff --git a/pkg/client/listen/v1/websocket/new_using_callbacks.go b/pkg/client/listen/v1/websocket/new_using_callbacks.go index d1990f1c..8c5b1580 100644 --- a/pkg/client/listen/v1/websocket/new_using_callbacks.go +++ b/pkg/client/listen/v1/websocket/new_using_callbacks.go @@ -19,18 +19,6 @@ import ( /* NewForDemo creates a new websocket connection with all default options -Notes: - - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY - -Deprecated: Use NewUsingCallbackForDemo instead -*/ -func NewForDemo(ctx context.Context, options *clientinterfaces.LiveTranscriptionOptions) (*WSCallback, error) { - return NewUsingCallbackForDemo(ctx, options) -} - -/* -NewForDemo creates a new websocket connection with all default options - Notes: - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY */ @@ -41,19 +29,6 @@ func NewUsingCallbackForDemo(ctx context.Context, options *clientinterfaces.Live /* NewWithDefaults creates a new websocket connection with all default options -Notes: - - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY - - The callback handler is set to the default handler which just prints all messages to the console - -Deprecated: Use NewUsingCallbackWithDefaults instead -*/ -func NewWithDefaults(ctx context.Context, options *clientinterfaces.LiveTranscriptionOptions, callback msginterfaces.LiveMessageCallback) (*WSCallback, error) { - return NewUsingCallback(ctx, "", &clientinterfaces.ClientOptions{}, options, callback) -} - -/* -NewWithDefaults creates a new websocket connection with all default options - Notes: - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY - The callback handler is set to the default handler which just prints all messages to the console @@ -65,22 +40,6 @@ func NewUsingCallbackWithDefaults(ctx context.Context, options *clientinterfaces /* New creates a new websocket connection with the specified options -Input parameters: -- ctx: context.Context object -- apiKey: string containing the Deepgram API key -- cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. -- tOptions: LiveTranscriptionOptions which allows overriding things like language, model, etc. -- callback: LiveMessageCallback which is a callback that allows you to perform actions based on the transcription - -Deprecated: Use NewUsingCallback instead -*/ -func New(ctx context.Context, apiKey string, cOptions *clientinterfaces.ClientOptions, tOptions *clientinterfaces.LiveTranscriptionOptions, callback msginterfaces.LiveMessageCallback) (*WSCallback, error) { - return NewUsingCallback(ctx, apiKey, cOptions, tOptions, callback) -} - -/* -New creates a new websocket connection with the specified options - Input parameters: - ctx: context.Context object - apiKey: string containing the Deepgram API key @@ -127,7 +86,7 @@ func NewUsingCallbackWithCancel(ctx context.Context, ctxCancel context.CancelFun } // init - var router msginterfaces.Router + var router commoninterfaces.Router router = websocketv1api.NewCallbackRouter(callback) conn := WSCallback{ @@ -141,10 +100,54 @@ func NewUsingCallbackWithCancel(ctx context.Context, ctxCancel context.CancelFun var handler commoninterfaces.WebSocketHandler handler = &conn - conn.WSClient = common.NewWS(ctx, ctxCancel, apiKey, cOptions, &handler) + conn.WSClient = common.NewWS(ctx, ctxCancel, apiKey, cOptions, &handler, &router) klog.V(3).Infof("NewDeepGramWSClient Succeeded\n") klog.V(6).Infof("New() LEAVE\n") return &conn, nil } + +/***********************************/ +// Deprecated functions +/***********************************/ +/* +NewForDemo creates a new websocket connection with all default options + +Notes: + - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY + +Deprecated: Use NewUsingCallbackForDemo instead +*/ +func NewForDemo(ctx context.Context, options *clientinterfaces.LiveTranscriptionOptions) (*WSCallback, error) { + return NewUsingCallbackForDemo(ctx, options) +} + +/* +NewWithDefaults creates a new websocket connection with all default options + +Notes: + - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY + - The callback handler is set to the default handler which just prints all messages to the console + +Deprecated: Use NewUsingCallbackWithDefaults instead +*/ +func NewWithDefaults(ctx context.Context, options *clientinterfaces.LiveTranscriptionOptions, callback msginterfaces.LiveMessageCallback) (*WSCallback, error) { + return NewUsingCallback(ctx, "", &clientinterfaces.ClientOptions{}, options, callback) +} + +/* +New creates a new websocket connection with the specified options + +Input parameters: +- ctx: context.Context object +- apiKey: string containing the Deepgram API key +- cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. +- tOptions: LiveTranscriptionOptions which allows overriding things like language, model, etc. +- callback: LiveMessageCallback which is a callback that allows you to perform actions based on the transcription + +Deprecated: Use NewUsingCallback instead +*/ +func New(ctx context.Context, apiKey string, cOptions *clientinterfaces.ClientOptions, tOptions *clientinterfaces.LiveTranscriptionOptions, callback msginterfaces.LiveMessageCallback) (*WSCallback, error) { + return NewUsingCallback(ctx, apiKey, cOptions, tOptions, callback) +} diff --git a/pkg/client/listen/v1/websocket/new_using_chan.go b/pkg/client/listen/v1/websocket/new_using_chan.go index 6d52f635..9ec73c23 100644 --- a/pkg/client/listen/v1/websocket/new_using_chan.go +++ b/pkg/client/listen/v1/websocket/new_using_chan.go @@ -33,7 +33,7 @@ Notes: - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY - The chans handler is set to the default handler which just prints all messages to the console */ -func NewUsingChanWithDefaults(ctx context.Context, options *clientinterfaces.LiveTranscriptionOptions, chans *msginterfaces.LiveMessageChan) (*WSChannel, error) { // gocritic:ignore +func NewUsingChanWithDefaults(ctx context.Context, options *clientinterfaces.LiveTranscriptionOptions, chans msginterfaces.LiveMessageChan) (*WSChannel, error) { // gocritic:ignore return NewUsingChan(ctx, "", &clientinterfaces.ClientOptions{}, options, chans) } @@ -47,7 +47,7 @@ Input parameters: - tOptions: LiveTranscriptionOptions which allows overriding things like language, model, etc. - chans: LiveMessageCallback which is a chans that allows you to perform actions based on the transcription */ -func NewUsingChan(ctx context.Context, apiKey string, cOptions *clientinterfaces.ClientOptions, tOptions *clientinterfaces.LiveTranscriptionOptions, chans *msginterfaces.LiveMessageChan) (*WSChannel, error) { +func NewUsingChan(ctx context.Context, apiKey string, cOptions *clientinterfaces.ClientOptions, tOptions *clientinterfaces.LiveTranscriptionOptions, chans msginterfaces.LiveMessageChan) (*WSChannel, error) { ctx, ctxCancel := context.WithCancel(ctx) return NewUsingChanWithCancel(ctx, ctxCancel, apiKey, cOptions, tOptions, chans) } @@ -63,7 +63,7 @@ Input parameters: - tOptions: LiveTranscriptionOptions which allows overriding things like language, model, etc. - chans: LiveMessageCallback which is a chans that allows you to perform actions based on the transcription */ -func NewUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *clientinterfaces.ClientOptions, tOptions *clientinterfaces.LiveTranscriptionOptions, chans *msginterfaces.LiveMessageChan) (*WSChannel, error) { +func NewUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *clientinterfaces.ClientOptions, tOptions *clientinterfaces.LiveTranscriptionOptions, chans msginterfaces.LiveMessageChan) (*WSChannel, error) { klog.V(6).Infof("live.New() ENTER\n") if apiKey != "" { @@ -82,14 +82,11 @@ func NewUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, a if chans == nil { klog.V(2).Infof("Using DefaultCallbackHandler.\n") - defaultHandler := websocketv1api.NewDefaultChanHandler() - var handler msginterfaces.LiveMessageChan - handler = defaultHandler - chans = &handler + chans = websocketv1api.NewDefaultChanHandler() } // init - var router msginterfaces.Router + var router commoninterfaces.Router router = websocketv1api.NewChanRouter(chans) conn := WSChannel{ @@ -103,7 +100,7 @@ func NewUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, a var handler commoninterfaces.WebSocketHandler handler = &conn - conn.WSClient = common.NewWS(ctx, ctxCancel, apiKey, cOptions, &handler) + conn.WSClient = common.NewWS(ctx, ctxCancel, apiKey, cOptions, &handler, &router) klog.V(3).Infof("NewDeepGramWSClient Succeeded\n") klog.V(6).Infof("live.New() LEAVE\n") diff --git a/pkg/client/listen/v1/websocket/types.go b/pkg/client/listen/v1/websocket/types.go index bd599433..79395f01 100644 --- a/pkg/client/listen/v1/websocket/types.go +++ b/pkg/client/listen/v1/websocket/types.go @@ -11,6 +11,7 @@ import ( msginterface "github.com/deepgram/deepgram-go-sdk/pkg/api/listen/v1/websocket/interfaces" common "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1" + commoninterfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1/interfaces" interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces" ) @@ -33,7 +34,7 @@ type WSCallback struct { tOptions *interfaces.LiveTranscriptionOptions callback msginterface.LiveMessageCallback - router *msginterface.Router + router *commoninterfaces.Router // internal constants for retry, waits, back-off, etc. lastDatagram *time.Time @@ -50,7 +51,7 @@ type WSChannel struct { tOptions *interfaces.LiveTranscriptionOptions chans []*msginterface.LiveMessageChan - router *msginterface.Router + router *commoninterfaces.Router // internal constants for retry, waits, back-off, etc. lastDatagram *time.Time diff --git a/pkg/client/speak/client.go b/pkg/client/speak/client.go index b87d5308..8bd05627 100644 --- a/pkg/client/speak/client.go +++ b/pkg/client/speak/client.go @@ -8,10 +8,12 @@ This package provides the speak client implementation for the Deepgram API package speak import ( - // msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" + "context" + + msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces/v1" speakv1rest "github.com/deepgram/deepgram-go-sdk/pkg/client/speak/v1/rest" - // speakv1ws "github.com/deepgram/deepgram-go-sdk/pkg/client/speak/v1/websocket" + speakv1ws "github.com/deepgram/deepgram-go-sdk/pkg/client/speak/v1/websocket" ) /***********************************/ @@ -21,23 +23,19 @@ const ( RESTPackageVersion = speakv1rest.PackageVersion ) -// Legacy Client Name -// -// Deprecated: This struct is deprecated. Please use RestClient struct. This will be removed in a future release. -type Client = speakv1rest.Client - // New Client Name type RestClient = speakv1rest.Client /* -NewWithDefaults creates a new speak client with all default options +NewRESTWithDefaults creates a new speak client with all default options + +Returns: +- *Client: a new speak client Notes: - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY - -Deprecated: This function is deprecated. Please use NewREST(). This will be removed in a future release. */ -func NewWithDefaults() *speakv1rest.Client { +func NewRESTWithDefaults() *speakv1rest.Client { return speakv1rest.NewWithDefaults() } @@ -45,23 +43,204 @@ func NewWithDefaults() *speakv1rest.Client { New creates a new speak client with the specified options Input parameters: -- ctx: context.Context object - apiKey: string containing the Deepgram API key - options: ClientOptions which allows overriding things like hostname, version of the API, etc. -Deprecated: This function is deprecated. Please use NewREST(). This will be removed in a future release. +Returns: +- *Client: a new speak client */ -func New(apiKey string, options *interfaces.ClientOptions) *speakv1rest.Client { +func NewREST(apiKey string, options *interfaces.ClientOptions) *speakv1rest.Client { return speakv1rest.New(apiKey, options) } +/***********************************/ +// WebSocket Client +/***********************************/ +const ( + WebSocketPackageVersion = speakv1ws.PackageVersion +) + +type WebSocketClient = speakv1ws.Client + /* -NewRESTWithDefaults creates a new speak client with all default options + Using Callbacks +*/ +/* +NewWSUsingCallbackForDemo creates a new websocket connection with all default options + +Input parameters: +- ctx: context.Context object +- options: SpeakOptions which allows overriding things like model, etc. + +Returns: +- *Client: a new websocket client Notes: - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY */ -func NewRESTWithDefaults() *speakv1rest.Client { +func NewWSUsingCallbackForDemo(ctx context.Context, options *interfaces.WSSpeakOptions) (*speakv1ws.WSCallback, error) { + return speakv1ws.NewUsingCallbackForDemo(ctx, options) +} + +/* +NewWSUsingCallbackWithDefaults creates a new websocket connection with all default options + +Input parameters: +- ctx: context.Context object +- options: SpeakOptions which allows overriding things like model, etc. +- callback: SpeakMessageCallback is a callback which lets you perform actions based on platform messages + +Returns: +- *Client: a new websocket client + +Notes: + - The callback handler is set to the default handler +*/ +func NewWSUsingCallbackWithDefaults(ctx context.Context, options *interfaces.WSSpeakOptions, callback msginterfaces.SpeakMessageCallback) (*speakv1ws.WSCallback, error) { + return speakv1ws.NewUsingCallbackWithDefaults(ctx, options, callback) +} + +/* +NewWSUsingCallbacks creates a new websocket connection with the specified options + +Input parameters: +- ctx: context.Context object +- apiKey: string containing the Deepgram API key +- cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. +- sOptions: SpeakOptions which allows overriding things like model, etc. +- callback: SpeakMessageCallback is a callback which lets you perform actions based on platform messages + +Returns: +- *Client: a new websocket client + +Notes: + - If apiKey is an empty string, the Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY + - The callback handler is set to the default handler +*/ +func NewWSUsingCallback(ctx context.Context, apiKey string, cOptions *interfaces.ClientOptions, sOptions *interfaces.WSSpeakOptions, callback msginterfaces.SpeakMessageCallback) (*speakv1ws.WSCallback, error) { + return speakv1ws.NewUsingCallback(ctx, apiKey, cOptions, sOptions, callback) +} + +/* +NewWSUsingCallbackWithCancel creates a new websocket connection but has facilities to BYOC (Bring Your Own Cancel) + +Input parameters: +- ctx: context.Context object +- ctxCancel: allow passing in own cancel +- apiKey: string containing the Deepgram API key +- cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. +- sOptions: SpeakOptions which allows overriding things like model, etc. +- callback: SpeakMessageCallback is a callback which lets you perform actions based on platform messages + +Returns: +- *Client: a new websocket client + +Notes: + - If apiKey is an empty string, the Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY + - The callback handler is set to the default handler +*/ +func NewWSUsingCallbackWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *interfaces.ClientOptions, sOptions *interfaces.WSSpeakOptions, callback msginterfaces.SpeakMessageCallback) (*speakv1ws.WSCallback, error) { + return speakv1ws.NewUsingCallbackWithCancel(ctx, ctxCancel, apiKey, cOptions, sOptions, callback) +} + +/* + Using Channels +*/ +/* +NewWSUsingChanForDemo creates a new websocket connection with all default options + +Input parameters: +- ctx: context.Context object +- options: SpeakOptions which allows overriding things like model, etc. + +Returns: +- *Client: a new websocket client + +Notes: + - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY +*/ +func NewWSUsingChanForDemo(ctx context.Context, options *interfaces.WSSpeakOptions) (*speakv1ws.WSChannel, error) { + return speakv1ws.NewUsingChanForDemo(ctx, options) +} + +/* +NewWSUsingChanWithDefaults creates a new websocket connection with all default options + +Input parameters: +- ctx: context.Context object +- options: SpeakOptions which allows overriding things like model, etc. +- callback: SpeakMessageCallback is a callback which lets you perform actions based on platform messages + +Returns: +- *Client: a new websocket client + +Notes: + - The callback handler is set to the default handler +*/ +func NewWSUsingChanWithDefaults(ctx context.Context, options *interfaces.WSSpeakOptions, callback msginterfaces.SpeakMessageChan) (*speakv1ws.WSChannel, error) { + return speakv1ws.NewUsingChanWithDefaults(ctx, options, callback) +} + +/* +NewWSUsingChan creates a new websocket connection with the specified options + +Input parameters: +- ctx: context.Context object +- apiKey: string containing the Deepgram API key +- cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. +- sOptions: SpeakOptions which allows overriding things like model, etc. +- callback: SpeakMessageCallback is a callback which lets you perform actions based on platform messages + +Returns: +- *Client: a new websocket client + +Notes: + - If apiKey is an empty string, the Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY + - The callback handler is set to the default handler +*/ +func NewWSUsingChan(ctx context.Context, apiKey string, cOptions *interfaces.ClientOptions, sOptions *interfaces.WSSpeakOptions, callback msginterfaces.SpeakMessageChan) (*speakv1ws.WSChannel, error) { + return speakv1ws.NewUsingChan(ctx, apiKey, cOptions, sOptions, callback) +} + +/* +NewWSUsingChanWithCancel creates a new websocket connection but has facilities to BYOC (Bring Your Own Cancel) + +Input parameters: +- ctx: context.Context object +- ctxCancel: allow passing in own cancel +- apiKey: string containing the Deepgram API key +- cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. +- sOptions: SpeakOptions which allows overriding things like model, etc. +- callback: SpeakMessageCallback is a callback which lets you perform actions based on platform messages + +Returns: +- *Client: a new websocket client + +Notes: + - If apiKey is an empty string, the Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY + - The callback handler is set to the default handler +*/ +func NewWSUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *interfaces.ClientOptions, sOptions *interfaces.WSSpeakOptions, callback msginterfaces.SpeakMessageChan) (*speakv1ws.WSChannel, error) { + return speakv1ws.NewUsingChanWithCancel(ctx, ctxCancel, apiKey, cOptions, sOptions, callback) +} + +/***********************************/ +// Deprecated (THESE WILL STILL WORK, +// BUT WILL BE REMOVED IN A FUTURE RELEASE) +/***********************************/ +/* +Legacy Client Name + +Deprecated: This struct is deprecated. Please use RestClient struct. This will be removed in a future release. +*/ +type Client = speakv1rest.Client + +/* +NewWithDefaults creates a new speak client with all default options + +Deprecated: This function is deprecated. Please use NewREST(). This will be removed in a future release. +*/ +func NewWithDefaults() *speakv1rest.Client { return speakv1rest.NewWithDefaults() } @@ -72,73 +251,9 @@ Input parameters: - ctx: context.Context object - apiKey: string containing the Deepgram API key - options: ClientOptions which allows overriding things like hostname, version of the API, etc. + +Deprecated: This function is deprecated. Please use NewREST(). This will be removed in a future release. */ -func NewREST(apiKey string, options *interfaces.ClientOptions) *speakv1rest.Client { +func New(apiKey string, options *interfaces.ClientOptions) *speakv1rest.Client { return speakv1rest.New(apiKey, options) } - -// /***********************************/ -// // WebSocket Client -// /***********************************/ -// const ( -// WebSocketPackageVersion = speakv1ws.PackageVersion -// ) - -// type WebSocketClient = speakv1ws.Client - -// /* -// NewWebSocketForDemo creates a new websocket connection with all default options - -// Notes: -// - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY -// */ -// func NewWebSocketForDemo(ctx context.Context, options *interfaces.SpeakOptions) (*speakv1ws.Client, error) { -// return speakv1ws.NewWebSocketForDemo(ctx, options) -// } - -// /* -// NewStreamWithDefaults creates a new websocket connection with all default options - -// Notes: -// - The callback handler is set to the default handler -// */ -// func NewWebSocketWithDefaults(ctx context.Context, options *interfaces.SpeakOptions, callback msginterfaces.SpeakMessageCallback) (*speakv1ws.Client, error) { -// return speakv1ws.NewWebSocketWithDefaults(ctx, options, callback) -// } - -// /* -// NewStream creates a new websocket connection with the specified options - -// Input parameters: -// - ctx: context.Context object -// - apiKey: string containing the Deepgram API key -// - cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. -// - sOptions: SpeakOptions which allows overriding things like model, etc. -// - callback: SpeakMessageCallback is a callback which lets you perform actions based on platform messages - -// Notes: -// - If apiKey is an empty string, the Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY -// - The callback handler is set to the default handler -// */ -// func NewWebSocket(ctx context.Context, apiKey string, cOptions *interfaces.ClientOptions, sOptions *interfaces.SpeakOptions, callback msginterfaces.SpeakMessageCallback) (*speakv1ws.Client, error) { -// return speakv1ws.NewWebSocket(ctx, apiKey, cOptions, sOptions, callback) -// } - -// /* -// NewWebSocketWithCancel creates a new websocket connection but has facilities to BYOC (Bring Your Own Cancel) - -// Input parameters: -// - ctx: context.Context object -// - ctxCancel: allow passing in own cancel -// - apiKey: string containing the Deepgram API key -// - cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. -// - sOptions: SpeakOptions which allows overriding things like model, etc. -// - callback: SpeakMessageCallback is a callback which lets you perform actions based on platform messages - -// Notes: -// - If apiKey is an empty string, the Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY -// - The callback handler is set to the default handler -// */ -// func NewWebSocketWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *interfaces.ClientOptions, sOptions *interfaces.SpeakOptions, callback msginterfaces.SpeakMessageCallback) (*speakv1ws.Client, error) { -// return speakv1ws.NewWebSocketWithCancel(ctx, ctxCancel, apiKey, cOptions, sOptions, callback) -// } diff --git a/pkg/client/speak/v1/websocket/client_callback.go b/pkg/client/speak/v1/websocket/client_callback.go new file mode 100644 index 00000000..fab5a0a2 --- /dev/null +++ b/pkg/client/speak/v1/websocket/client_callback.go @@ -0,0 +1,330 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +// This package provides the speak/streaming client implementation for the Deepgram API +package websocketv1 + +import ( + "context" + "encoding/json" + "fmt" + "regexp" + "strings" + "time" + + "github.com/dvonthenen/websocket" + klog "k8s.io/klog/v2" + + msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" + version "github.com/deepgram/deepgram-go-sdk/pkg/api/version" + common "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1" +) + +// Connect performs a websocket connection with "DefaultConnectRetry" number of retries. +func (c *WSCallback) Connect() bool { + c.ctx, c.ctxCancel = context.WithCancel(c.ctx) + return c.ConnectWithCancel(c.ctx, c.ctxCancel, int(DefaultConnectRetry)) +} + +// ConnectWithCancel performs a websocket connection with specified number of retries and providing a +// cancel function to stop the connection +func (c *WSCallback) ConnectWithCancel(ctx context.Context, ctxCancel context.CancelFunc, retryCnt int) bool { + c.ctx = ctx + c.ctxCancel = ctxCancel + return c.WSClient.ConnectWithCancel(ctx, ctxCancel, retryCnt) +} + +// AttemptReconnect performs a reconnect after failing retries +func (c *WSCallback) AttemptReconnect(ctx context.Context, retries int64) bool { + c.ctx, c.ctxCancel = context.WithCancel(ctx) + return c.AttemptReconnectWithCancel(c.ctx, c.ctxCancel, retries) +} + +// AttemptReconnect performs a reconnect after failing retries and providing a cancel function +func (c *WSCallback) AttemptReconnectWithCancel(ctx context.Context, ctxCancel context.CancelFunc, retries int64) bool { + c.ctx = ctx + c.ctxCancel = ctxCancel + return c.WSClient.AttemptReconnectWithCancel(ctx, ctxCancel, retries) +} + +// GetURL returns the websocket URL +func (c *WSCallback) GetURL(host string) (string, error) { + url, err := version.GetSpeakStreamAPI(c.ctx, c.cOptions.Host, c.cOptions.APIVersion, c.cOptions.Path, c.sOptions) + if err != nil { + klog.V(1).Infof("version.GetSpeakStreamAPI failed. Err: %v\n", err) + return "", err + } + klog.V(5).Infof("Connecting to %s\n", url) + return url, nil +} + +// Start the callback +func (c *WSCallback) Start() { + if c.cOptions.AutoFlushSpeakDelta != 0 { + go c.flush() + } +} + +// ProcessMessage processes the incoming message +func (c *WSCallback) ProcessMessage(wsType int, byMsg []byte) error { + klog.V(6).Infof("ProcessMessage() ENTER\n") + + switch wsType { + case websocket.TextMessage: + // inspect the message + if c.cOptions.InspectSpeakMessage() { + err := c.inspect(byMsg) + if err != nil { + klog.V(1).Infof("speak: inspect failed. Err: %v\n", err) + } + } + + // route the message + err := (*c.router).Message(byMsg) + if err != nil { + klog.V(1).Infof("speak.listen(): router.Message failed. Err: %v\n", err) + } + case websocket.BinaryMessage: + // audio data! + err := (*c.router).Binary(byMsg) + if err != nil { + klog.V(1).Infof("speak.listen(): router.Message failed. Err: %v\n", err) + } + default: + klog.V(7).Infof("speak.listen(): msg recv: type %d, len: %d\n", wsType, len(byMsg)) + } + + klog.V(6).Infof("ProcessMessage Succeeded\n") + klog.V(6).Infof("ProcessMessage() LEAVE\n") + + return nil +} + +// SpeakWithText writes text to the websocket server to obtain corresponding audio +// +// This function will automatically wrap the text in the appropriate JSON structure +// and send it to the server +// +// Args: +// +// text: string containing the text to be spoken +// +// Return: +// +// error: if successful, returns nil otherwise an error object +func (c *WSCallback) SpeakWithText(text string) error { + klog.V(6).Infof("speak.SpeakText() ENTER\n") + klog.V(4).Infof("text: %s\n", text) + + err := c.WSClient.WriteJSON(TextSource{ + Type: MessageTypeSpeak, + Text: text, + }) + if err == nil { + klog.V(4).Infof("SpeakText Succeeded\n") + } else { + klog.V(1).Infof("SpeakText failed. Err: %v\n", err) + } + + klog.V(6).Infof("speak.SpeakText() LEAVE\n") + + return err +} + +// Speak is an alias function for SpeakWithText +func (c *WSCallback) Speak(text string) error { + return c.SpeakWithText(text) +} + +// WriteJSON writes a JSON message to the websocket +func (c *WSCallback) WriteJSON(playload controlMessage) error { + if playload.Type == MessageTypeFlush { + c.muFinal.Lock() + c.flushCount++ + klog.V(5).Infof("Flush Count: %d\n", c.flushCount) + c.muFinal.Unlock() + } + + return c.WSClient.WriteJSON(playload) +} + +// Flush will instruct the server to flush the current text buffer +func (c *WSCallback) Flush() error { + klog.V(6).Infof("speak.Flush() ENTER\n") + + err := c.WriteJSON(controlMessage{Type: MessageTypeFlush}) + if err != nil { + klog.V(1).Infof("Flush failed. Err: %v\n", err) + klog.V(6).Infof("speak.Flush() LEAVE\n") + + return err + } + c.flushCount++ + + klog.V(4).Infof("Flush Succeeded\n") + klog.V(6).Infof("speak.Flush() LEAVE\n") + + return err +} + +// Reset will instruct the server to reset the current buffer +func (c *WSCallback) Reset() error { + klog.V(6).Infof("speak.Reset() ENTER\n") + + err := c.WriteJSON(controlMessage{Type: MessageTypeReset}) + if err != nil { + klog.V(1).Infof("Reset failed. Err: %v\n", err) + klog.V(6).Infof("speak.Reset() LEAVE\n") + + return err + } + + klog.V(4).Infof("Reset Succeeded\n") + klog.V(6).Infof("speak.Reset() LEAVE\n") + return nil +} + +// GetCloseMsg sends an application level message to Deepgram +func (c *WSCallback) GetCloseMsg() []byte { + return []byte("{ \"type\": \"Close\" }") +} + +// Finish the callback +func (c *WSCallback) Finish() { + // NA +} + +// ProcessError sends an error message to the callback handler +func (c *WSCallback) ProcessError(err error) error { + response := c.errorToResponse(err) + sendErr := (*c.router).Error(response) + if err != nil { + klog.V(1).Infof("speak.listen(): router.Error failed. Err: %v\n", sendErr) + } + + return err +} + +// flush thread +func (c *WSCallback) flush() { + klog.V(6).Infof("speak.flush() ENTER\n") + + defer func() { + if r := recover(); r != nil { + klog.V(1).Infof("Panic triggered\n") + + // send error on callback + err := common.ErrFatalPanicRecovered + sendErr := c.ProcessError(err) + if sendErr != nil { + klog.V(1).Infof("speak: Fatal socket error. Err: %v\n", sendErr) + } + + klog.V(6).Infof("speak.flush() LEAVE\n") + return + } + }() + + ticker := time.NewTicker(flushPeriod) + defer ticker.Stop() + for { + select { + case <-c.ctx.Done(): + klog.V(3).Infof("speak.flush() Exiting\n") + klog.V(6).Infof("speak.flush() LEAVE\n") + return + case <-ticker.C: + // doing a read, need to lock. + c.muFinal.Lock() + + // have we received anything? no, then skip + if c.lastDatagram == nil { + klog.V(7).Infof("No datagram received. Skipping...\n") + c.muFinal.Unlock() + continue + } + + // we have received something, but is it recent? + trigger := c.lastDatagram.Add(time.Millisecond * time.Duration(c.cOptions.AutoFlushSpeakDelta)) + now := time.Now() + klog.V(6).Infof("Time (Last): %s\n", trigger.String()) + klog.V(6).Infof("Time (Now ): %s\n", now.String()) + bNeedFlush := trigger.Before(now) + if bNeedFlush { + c.lastDatagram = nil + } + + // release + c.muFinal.Unlock() + + if bNeedFlush { + klog.V(5).Infof("Sending Flush message...\n") + err := c.Flush() + if err == nil { + klog.V(5).Infof("Flush sent!") + } else { + klog.V(1).Infof("Failed to send Flush. Err: %v\n", err) + } + } + } + } +} + +// errorToResponse converts an error into a Deepgram error response +func (c *WSCallback) errorToResponse(err error) *msginterfaces.ErrorResponse { + r := regexp.MustCompile(`websocket: ([a-z]+) (\d+) .+: (.+)`) + + var errorCode string + var errorNum string + var errorDesc string + + matches := r.FindStringSubmatch(err.Error()) + if len(matches) > 3 { + errorCode = matches[1] + errorNum = matches[2] + errorDesc = matches[3] + } else { + errorCode = common.UnknownDeepgramErr + errorNum = common.UnknownDeepgramErr + errorDesc = err.Error() + } + + response := &msginterfaces.ErrorResponse{ + Type: string(msginterfaces.TypeErrorResponse), + ErrMsg: strings.TrimSpace(fmt.Sprintf("%s %s", errorCode, errorNum)), + Description: strings.TrimSpace(errorDesc), + Variant: errorNum, + } + return response +} + +// inspect will check the message and determine the type to +// see if we should do actionable based on those types of messages +func (c *WSCallback) inspect(byMsg []byte) error { + klog.V(7).Infof("speak.inspect() ENTER\n") + + var mt msginterfaces.MessageType + if err := json.Unmarshal(byMsg, &mt); err != nil { + klog.V(1).Infof("json.Unmarshal(MessageType) failed. Err: %v\n", err) + klog.V(7).Infof("speak.inspect() LEAVE\n") + return err + } + + switch msginterfaces.TypeResponse(mt.Type) { + case msginterfaces.TypeFlushedResponse: + klog.V(7).Infof("TypeFlushedResponse\n") + + // decrement the flush count + c.muFinal.Lock() + c.flushCount-- + klog.V(5).Infof("Flush Count: %d\n", c.flushCount) + c.muFinal.Unlock() + default: + klog.V(5).Infof("MessageType: %s\n", mt.Type) + } + + klog.V(7).Info("inspect() succeeded\n") + klog.V(7).Infof("speak.inspect() LEAVE\n") + return nil +} diff --git a/pkg/client/speak/v1/websocket/client_channel.go b/pkg/client/speak/v1/websocket/client_channel.go new file mode 100644 index 00000000..f53dde85 --- /dev/null +++ b/pkg/client/speak/v1/websocket/client_channel.go @@ -0,0 +1,329 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +// This package provides the live/streaming client implementation for the Deepgram API +package websocketv1 + +import ( + "context" + "encoding/json" + "fmt" + "regexp" + "strings" + "time" + + "github.com/dvonthenen/websocket" + klog "k8s.io/klog/v2" + + msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" + version "github.com/deepgram/deepgram-go-sdk/pkg/api/version" + common "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1" +) + +// Connect performs a websocket connection with "DefaultConnectRetry" number of retries. +func (c *WSChannel) Connect() bool { + c.ctx, c.ctxCancel = context.WithCancel(c.ctx) + return c.ConnectWithCancel(c.ctx, c.ctxCancel, int(DefaultConnectRetry)) +} + +// ConnectWithCancel performs a websocket connection with specified number of retries and providing a +// cancel function to stop the connection +func (c *WSChannel) ConnectWithCancel(ctx context.Context, ctxCancel context.CancelFunc, retryCnt int) bool { + c.ctx = ctx + c.ctxCancel = ctxCancel + return c.WSClient.ConnectWithCancel(ctx, ctxCancel, retryCnt) +} + +// AttemptReconnect performs a reconnect after failing retries +func (c *WSChannel) AttemptReconnect(ctx context.Context, retries int64) bool { + c.ctx, c.ctxCancel = context.WithCancel(ctx) + return c.AttemptReconnectWithCancel(c.ctx, c.ctxCancel, retries) +} + +// AttemptReconnect performs a reconnect after failing retries and providing a cancel function +func (c *WSChannel) AttemptReconnectWithCancel(ctx context.Context, ctxCancel context.CancelFunc, retries int64) bool { + c.ctx = ctx + c.ctxCancel = ctxCancel + return c.WSClient.AttemptReconnectWithCancel(ctx, ctxCancel, retries) +} + +// GetURL returns the websocket URL +func (c *WSChannel) GetURL(host string) (string, error) { + url, err := version.GetSpeakStreamAPI(c.ctx, c.cOptions.Host, c.cOptions.APIVersion, c.cOptions.Path, c.sOptions) + if err != nil { + klog.V(1).Infof("version.GetSpeakStreamAPI failed. Err: %v\n", err) + return "", err + } + klog.V(5).Infof("Connecting to %s\n", url) + return url, nil +} + +// Start the keepalive and flush threads +func (c *WSChannel) Start() { + if c.cOptions.AutoFlushReplyDelta != 0 { + go c.flush() + } +} + +// ProcessMessage processes the message and sends it to the callback +func (c *WSChannel) ProcessMessage(wsType int, byMsg []byte) error { + klog.V(6).Infof("ProcessMessage() ENTER\n") + + switch wsType { + case websocket.TextMessage: + // inspect the message + if c.cOptions.InspectSpeakMessage() { + err := c.inspect(byMsg) + if err != nil { + klog.V(1).Infof("speak: inspect failed. Err: %v\n", err) + } + } + + // route the message + err := (*c.router).Message(byMsg) + if err != nil { + klog.V(1).Infof("speak.listen(): router.Message failed. Err: %v\n", err) + } + case websocket.BinaryMessage: + // audio data! + err := (*c.router).Binary(byMsg) + if err != nil { + klog.V(1).Infof("speak.listen(): router.Message failed. Err: %v\n", err) + } + default: + klog.V(7).Infof("speak.listen(): msg recv: type %d, len: %d\n", wsType, len(byMsg)) + } + + klog.V(6).Infof("ProcessMessage Succeeded\n") + klog.V(6).Infof("ProcessMessage() LEAVE\n") + + return nil +} + +// SpeakWithText writes text to the websocket server to obtain corresponding audio +// +// This function will automatically wrap the text in the appropriate JSON structure +// and send it to the server +// +// Args: +// +// text: string containing the text to be spoken +// +// Return: +// +// error: if successful, returns nil otherwise an error object +func (c *WSChannel) SpeakWithText(text string) error { + klog.V(6).Infof("speak.SpeakText() ENTER\n") + klog.V(4).Infof("text: %s\n", text) + + err := c.WSClient.WriteJSON(TextSource{ + Type: MessageTypeSpeak, + Text: text, + }) + if err == nil { + klog.V(4).Infof("SpeakText Succeeded\n") + } else { + klog.V(1).Infof("SpeakText failed. Err: %v\n", err) + } + + klog.V(6).Infof("speak.SpeakText() LEAVE\n") + + return err +} + +// Speak is an alias function for SpeakWithText +func (c *WSChannel) Speak(text string) error { + return c.SpeakWithText(text) +} + +// WriteJSON writes a JSON message to the websocket +func (c *WSChannel) WriteJSON(playload controlMessage) error { + if playload.Type == MessageTypeFlush { + c.muFinal.Lock() + c.flushCount++ + klog.V(5).Infof("Flush Count: %d\n", c.flushCount) + c.muFinal.Unlock() + } + + return c.WSClient.WriteJSON(playload) +} + +// Flush will instruct the server to flush the current text buffer +func (c *WSChannel) Flush() error { + klog.V(6).Infof("speak.Flush() ENTER\n") + + err := c.WriteJSON(controlMessage{Type: MessageTypeFlush}) + if err != nil { + klog.V(1).Infof("Flush failed. Err: %v\n", err) + klog.V(6).Infof("speak.Flush() LEAVE\n") + + return err + } + + klog.V(4).Infof("Flush Succeeded\n") + klog.V(6).Infof("speak.Flush() LEAVE\n") + + return err +} + +// Reset will instruct the server to reset the current buffer +func (c *WSChannel) Reset() error { + klog.V(6).Infof("speak.Reset() ENTER\n") + + err := c.WriteJSON(controlMessage{Type: MessageTypeReset}) + if err != nil { + klog.V(1).Infof("Reset failed. Err: %v\n", err) + klog.V(6).Infof("speak.Reset() LEAVE\n") + + return err + } + + klog.V(4).Infof("Reset Succeeded\n") + klog.V(6).Infof("speak.Reset() LEAVE\n") + return nil +} + +// GetCloseMsg sends an application level message to Deepgram +func (c *WSChannel) GetCloseMsg() []byte { + return []byte("{ \"type\": \"Close\" }") +} + +// Finish the websocket connection +func (c *WSChannel) Finish() { + // NA +} + +// ProcessError processes the error and sends it to the callback +func (c *WSChannel) ProcessError(err error) error { + response := c.errorToResponse(err) + sendErr := (*c.router).Error(response) + if err != nil { + klog.V(1).Infof("ProcessError failed. Err: %v\n", sendErr) + } + + return err +} + +// flush thread +func (c *WSChannel) flush() { + klog.V(6).Infof("speak.flush() ENTER\n") + + defer func() { + if r := recover(); r != nil { + klog.V(1).Infof("Panic triggered\n") + + // send error on callback + err := common.ErrFatalPanicRecovered + sendErr := c.ProcessError(err) + if sendErr != nil { + klog.V(1).Infof("listen: Fatal socket error. Err: %v\n", sendErr) + } + + klog.V(6).Infof("speak.flush() LEAVE\n") + return + } + }() + + ticker := time.NewTicker(flushPeriod) + defer ticker.Stop() + for { + select { + case <-c.ctx.Done(): + klog.V(3).Infof("speak.flush() Exiting\n") + klog.V(6).Infof("speak.flush() LEAVE\n") + return + case <-ticker.C: + // doing a read, need to lock. + c.muFinal.Lock() + + // have we received anything? no, then skip + if c.lastDatagram == nil { + klog.V(7).Infof("No datagram received. Skipping...\n") + c.muFinal.Unlock() + continue + } + + // we have received something, but is it recent? + trigger := c.lastDatagram.Add(time.Millisecond * time.Duration(c.cOptions.AutoFlushReplyDelta)) + now := time.Now() + klog.V(7).Infof("Time (Last): %s\n", trigger.String()) + klog.V(7).Infof("Time (Now ): %s\n", now.String()) + bNeedFlush := trigger.Before(now) + if bNeedFlush { + c.lastDatagram = nil + } + + // release + c.muFinal.Unlock() + + if bNeedFlush { + klog.V(5).Infof("Sending Flush message...\n") + err := c.Flush() + if err == nil { + klog.V(5).Infof("Flush sent!") + } else { + klog.V(1).Infof("Failed to send Flush. Err: %v\n", err) + } + } + } + } +} + +// errorToResponse converts an error into a Deepgram error response +func (c *WSChannel) errorToResponse(err error) *msginterfaces.ErrorResponse { + r := regexp.MustCompile(`websocket: ([a-z]+) (\d+) .+: (.+)`) + + var errorCode string + var errorNum string + var errorDesc string + + matches := r.FindStringSubmatch(err.Error()) + if len(matches) > 3 { + errorCode = matches[1] + errorNum = matches[2] + errorDesc = matches[3] + } else { + errorCode = common.UnknownDeepgramErr + errorNum = common.UnknownDeepgramErr + errorDesc = err.Error() + } + + response := &msginterfaces.ErrorResponse{ + Type: string(msginterfaces.TypeErrorResponse), + ErrMsg: strings.TrimSpace(fmt.Sprintf("%s %s", errorCode, errorNum)), + Description: strings.TrimSpace(errorDesc), + Variant: errorNum, + } + return response +} + +// inspect will check the message and determine the type to +// see if we should do actionable based on those types of messages +func (c *WSChannel) inspect(byMsg []byte) error { + klog.V(7).Infof("speak.inspect() ENTER\n") + + var mt msginterfaces.MessageType + if err := json.Unmarshal(byMsg, &mt); err != nil { + klog.V(1).Infof("json.Unmarshal(MessageType) failed. Err: %v\n", err) + klog.V(7).Infof("speak.inspect() LEAVE\n") + return err + } + + switch msginterfaces.TypeResponse(mt.Type) { + case msginterfaces.TypeFlushedResponse: + klog.V(7).Infof("TypeFlushedResponse\n") + + // decrement the flush count + c.muFinal.Lock() + c.flushCount-- + klog.V(5).Infof("Flush Count: %d\n", c.flushCount) + c.muFinal.Unlock() + default: + klog.V(5).Infof("MessageType: %s\n", mt.Type) + } + + klog.V(7).Info("inspect() succeeded\n") + klog.V(7).Infof("speak.inspect() LEAVE\n") + return nil +} diff --git a/pkg/client/speak/v1/websocket/constants.go b/pkg/client/speak/v1/websocket/constants.go new file mode 100644 index 00000000..551c26bc --- /dev/null +++ b/pkg/client/speak/v1/websocket/constants.go @@ -0,0 +1,40 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package websocketv1 + +import ( + "time" +) + +const ( + PackageVersion string = "v1.0" +) + +// external constants +const ( + DefaultConnectRetry int64 = 3 + + ChunkSize = 1024 * 2 + TerminationSleep = 100 * time.Millisecond +) + +const ( + // MessageTypeFlush flushes the audio from the server + MessageTypeSpeak string = "Speak" + + // MessageTypeFlush flushes the audio from the server + MessageTypeFlush string = "Flush" + + // MessageTypeReset resets the text buffer + MessageTypeReset string = "Reset" + + // MessageTypeClose closes the stream + MessageTypeClose string = "Close" +) + +// internal constants for retry, waits, back-off, etc. +const ( + flushPeriod = 500 * time.Millisecond +) diff --git a/pkg/client/speak/v1/websocket/new_using_callbacks.go b/pkg/client/speak/v1/websocket/new_using_callbacks.go new file mode 100644 index 00000000..a9dbb973 --- /dev/null +++ b/pkg/client/speak/v1/websocket/new_using_callbacks.go @@ -0,0 +1,118 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +// This package provides the speak/streaming client implementation for the Deepgram API +package websocketv1 + +import ( + "context" + + klog "k8s.io/klog/v2" + + websocketv1api "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket" + msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" + common "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1" + commoninterfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1/interfaces" + clientinterfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces/v1" +) + +/* +NewForDemo creates a new websocket connection with all default options + +Notes: + - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY +*/ +func NewUsingCallbackForDemo(ctx context.Context, options *clientinterfaces.WSSpeakOptions) (*WSCallback, error) { + return NewUsingCallback(ctx, "", &clientinterfaces.ClientOptions{}, options, nil) +} + +/* +NewWithDefaults creates a new websocket connection with all default options + +Notes: + - The callback handler is set to the default handler +*/ +func NewUsingCallbackWithDefaults(ctx context.Context, options *clientinterfaces.WSSpeakOptions, callback msginterfaces.SpeakMessageCallback) (*WSCallback, error) { + return NewUsingCallback(ctx, "", &clientinterfaces.ClientOptions{}, options, callback) +} + +/* +New creates a new websocket connection with the specified options + +Input parameters: +- ctx: context.Context object +- apiKey: string containing the Deepgram API key +- cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. +- sOptions: SpeakOptions which allows overriding things like model, etc. +- callback: SpeakMessageCallback is a callback which lets you perform actions based on platform messages + +Notes: + - If apiKey is an empty string, the Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY + - The callback handler is set to the default handler +*/ +func NewUsingCallback(ctx context.Context, apiKey string, cOptions *clientinterfaces.ClientOptions, sOptions *clientinterfaces.WSSpeakOptions, callback msginterfaces.SpeakMessageCallback) (*WSCallback, error) { + ctx, ctxCancel := context.WithCancel(ctx) + return NewUsingCallbackWithCancel(ctx, ctxCancel, apiKey, cOptions, sOptions, callback) +} + +/* +NewWithCancel creates a new websocket connection with the specified options + +Input parameters: +- ctx: context.Context object +- ctxCancel: allow passing in own cancel +- apiKey: string containing the Deepgram API key +- cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. +- sOptions: SpeakOptions which allows overriding things like model, etc. +- callback: SpeakMessageCallback is a callback which lets you perform actions based on platform messages + +Notes: + - If apiKey is an empty string, the Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY + - The callback handler is set to the default handler +*/ +func NewUsingCallbackWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *clientinterfaces.ClientOptions, sOptions *clientinterfaces.WSSpeakOptions, callback msginterfaces.SpeakMessageCallback) (*WSCallback, error) { + klog.V(6).Infof("speak.New() ENTER\n") + + if apiKey != "" { + cOptions.APIKey = apiKey + } + err := cOptions.Parse() + if err != nil { + klog.V(1).Infof("ClientOptions.Parse() failed. Err: %v\n", err) + return nil, err + } + err = sOptions.Check() + if err != nil { + klog.V(1).Infof("SpeakOptions.Check() failed. Err: %v\n", err) + return nil, err + } + + if callback == nil { + klog.V(2).Infof("Using DefaultCallbackHandler.\n") + callback = websocketv1api.NewDefaultCallbackHandler() + } + + // init + var router commoninterfaces.Router + router = websocketv1api.NewCallbackRouter(callback) + + // init + conn := Client{ + cOptions: cOptions, + sOptions: sOptions, + callback: callback, + router: &router, + ctx: ctx, + ctxCancel: ctxCancel, + } + + var handler commoninterfaces.WebSocketHandler + handler = &conn + conn.WSClient = common.NewWS(ctx, ctxCancel, apiKey, cOptions, &handler, &router) + + klog.V(3).Infof("NewDeepGramWSClient Succeeded\n") + klog.V(6).Infof("speak.New() LEAVE\n") + + return &conn, nil +} diff --git a/pkg/client/speak/v1/websocket/new_using_chan.go b/pkg/client/speak/v1/websocket/new_using_chan.go new file mode 100644 index 00000000..b7a4c074 --- /dev/null +++ b/pkg/client/speak/v1/websocket/new_using_chan.go @@ -0,0 +1,109 @@ +// Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package websocketv1 + +import ( + "context" + + klog "k8s.io/klog/v2" + + websocketv1api "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket" + msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" + common "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1" + commoninterfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1/interfaces" + clientinterfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces" +) + +/* +NewForDemo creates a new websocket connection with all default options + +Notes: + - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY +*/ +func NewUsingChanForDemo(ctx context.Context, options *clientinterfaces.WSSpeakOptions) (*WSChannel, error) { + return NewUsingChan(ctx, "", &clientinterfaces.ClientOptions{}, options, nil) +} + +/* +NewWithDefaults creates a new websocket connection with all default options + +Notes: + - The Deepgram API KEY is read from the environment variable DEEPGRAM_API_KEY + - The chans handler is set to the default handler which just prints all messages to the console +*/ +func NewUsingChanWithDefaults(ctx context.Context, options *clientinterfaces.WSSpeakOptions, chans msginterfaces.SpeakMessageChan) (*WSChannel, error) { // gocritic:ignore + return NewUsingChan(ctx, "", &clientinterfaces.ClientOptions{}, options, chans) +} + +/* +New creates a new websocket connection with the specified options + +Input parameters: +- ctx: context.Context object +- apiKey: string containing the Deepgram API key +- cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. +- tOptions: LiveTranscriptionOptions which allows overriding things like language, model, etc. +- chans: LiveMessageCallback which is a chans that allows you to perform actions based on the transcription +*/ +func NewUsingChan(ctx context.Context, apiKey string, cOptions *clientinterfaces.ClientOptions, sOptions *clientinterfaces.WSSpeakOptions, chans msginterfaces.SpeakMessageChan) (*WSChannel, error) { + ctx, ctxCancel := context.WithCancel(ctx) + return NewUsingChanWithCancel(ctx, ctxCancel, apiKey, cOptions, sOptions, chans) +} + +/* +New creates a new websocket connection with the specified options + +Input parameters: +- ctx: context.Context object +- ctxCancel: allow passing in own cancel +- apiKey: string containing the Deepgram API key +- cOptions: ClientOptions which allows overriding things like hostname, version of the API, etc. +- tOptions: LiveTranscriptionOptions which allows overriding things like language, model, etc. +- chans: LiveMessageCallback which is a chans that allows you to perform actions based on the transcription +*/ +func NewUsingChanWithCancel(ctx context.Context, ctxCancel context.CancelFunc, apiKey string, cOptions *clientinterfaces.ClientOptions, sOptions *clientinterfaces.WSSpeakOptions, chans msginterfaces.SpeakMessageChan) (*WSChannel, error) { + klog.V(6).Infof("speak.New() ENTER\n") + + if apiKey != "" { + cOptions.APIKey = apiKey + } + err := cOptions.Parse() + if err != nil { + klog.V(1).Infof("ClientOptions.Parse() failed. Err: %v\n", err) + return nil, err + } + err = sOptions.Check() + if err != nil { + klog.V(1).Infof("TranscribeOptions.Check() failed. Err: %v\n", err) + return nil, err + } + + if chans == nil { + klog.V(2).Infof("Using DefaultCallbackHandler.\n") + chans = websocketv1api.NewDefaultChanHandler() + } + + // init + var router commoninterfaces.Router + router = websocketv1api.NewChanRouter(chans) + + conn := WSChannel{ + cOptions: cOptions, + sOptions: sOptions, + chans: make([]*msginterfaces.SpeakMessageChan, 0), + router: &router, + ctx: ctx, + ctxCancel: ctxCancel, + } + + var handler commoninterfaces.WebSocketHandler + handler = &conn + conn.WSClient = common.NewWS(ctx, ctxCancel, apiKey, cOptions, &handler, &router) + + klog.V(3).Infof("NewDeepGramWSClient Succeeded\n") + klog.V(6).Infof("speak.New() LEAVE\n") + + return &conn, nil +} diff --git a/pkg/client/speak/v1/websocket/types.go b/pkg/client/speak/v1/websocket/types.go new file mode 100644 index 00000000..45f17b56 --- /dev/null +++ b/pkg/client/speak/v1/websocket/types.go @@ -0,0 +1,67 @@ +// Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved. +// Use of this source code is governed by a MIT license that can be found in the LICENSE file. +// SPDX-License-Identifier: MIT + +package websocketv1 + +import ( + "context" + "sync" + "time" + + msginterface "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" + common "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1" + commoninterfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/common/v1/interfaces" + interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces/v1" +) + +// external structs +type TextSource struct { + Type string `json:"type"` + Text string `json:"text"` +} + +// internal structs +type controlMessage struct { + Type string `json:"type"` +} + +// Client is an alias for WSCallback +// Deprecated: use WSCallback instead +type Client = WSCallback + +// WSCallback is a struct representing the websocket client connection using callbacks +type WSCallback struct { + *common.WSClient + ctx context.Context + ctxCancel context.CancelFunc + + cOptions *interfaces.ClientOptions + sOptions *interfaces.WSSpeakOptions + + callback msginterface.SpeakMessageCallback + router *commoninterfaces.Router + + // internal constants for retry, waits, back-off, etc. + lastDatagram *time.Time + muFinal sync.RWMutex + flushCount int64 +} + +// WSChannel is a struct representing the websocket client connection using channels +type WSChannel struct { + *common.WSClient + ctx context.Context + ctxCancel context.CancelFunc + + cOptions *interfaces.ClientOptions + sOptions *interfaces.WSSpeakOptions + + chans []*msginterface.SpeakMessageChan + router *commoninterfaces.Router + + // internal constants for retry, waits, back-off, etc. + lastDatagram *time.Time + muFinal sync.RWMutex + flushCount int64 +} diff --git a/tests/daily_test/prerecorded_test.go b/tests/daily_test/prerecorded_test.go index f848714f..f6e978ee 100644 --- a/tests/daily_test/prerecorded_test.go +++ b/tests/daily_test/prerecorded_test.go @@ -29,8 +29,8 @@ const ( ) const ( - FromURLSmartFormat = "Yep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it." - FromURLSummarize = "Yep. I said it before, and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it." + FromURLSmartFormat = "Yep. I said it before, and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it." + FromURLSummarize = "Yep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it." ) func init() { diff --git a/tests/response_data/642c86c60eedbc4af873632b86d68164149599cf97131d81a63a2711f0563d37-response.json b/tests/response_data/642c86c60eedbc4af873632b86d68164149599cf97131d81a63a2711f0563d37-response.json index 4aab9149..249d61cc 100755 --- a/tests/response_data/642c86c60eedbc4af873632b86d68164149599cf97131d81a63a2711f0563d37-response.json +++ b/tests/response_data/642c86c60eedbc4af873632b86d68164149599cf97131d81a63a2711f0563d37-response.json @@ -1 +1 @@ -{"metadata":{"transaction_key":"deprecated","request_id":"7644cc73-0901-4363-a99d-2fcbf9486ef4","sha256":"5324da68ede209a16ac69a38e8cd29cee4d754434a041166cda3a1f5e0b24566","created":"2024-05-10T19:39:37.077Z","duration":17.566313,"channels":1,"models":["1abfe86b-e047-4eed-858a-35e5625b41ee"],"model_info":{"1abfe86b-e047-4eed-858a-35e5625b41ee":{"name":"2-general-nova","version":"2024-01-06.5664","arch":"nova-2"}},"summary_info":{"model_uuid":"67875a7f-c9c4-48a0-aa55-5bdb8a91c34a"}},"results":{"channels":[{"alternatives":[{"transcript":"Yep. I said it before, and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.","confidence":0.9993993,"words":[{"word":"yep","start":5.52,"end":6.02,"confidence":0.9983343,"punctuated_word":"Yep."},{"word":"i","start":7.095,"end":7.2549996,"confidence":0.86866945,"punctuated_word":"I"},{"word":"said","start":7.2549996,"end":7.415,"confidence":0.9355344,"punctuated_word":"said"},{"word":"it","start":7.415,"end":7.495,"confidence":0.9991001,"punctuated_word":"it"},{"word":"before","start":7.495,"end":7.975,"confidence":0.75923455,"punctuated_word":"before,"},{"word":"and","start":7.975,"end":8.135,"confidence":0.9998159,"punctuated_word":"and"},{"word":"i'll","start":8.135,"end":8.295,"confidence":0.9995924,"punctuated_word":"I'll"},{"word":"say","start":8.295,"end":8.455,"confidence":0.9993993,"punctuated_word":"say"},{"word":"it","start":8.455,"end":8.615,"confidence":0.9994319,"punctuated_word":"it"},{"word":"again","start":8.615,"end":9.115,"confidence":0.9314046,"punctuated_word":"again."},{"word":"life","start":9.975,"end":10.375,"confidence":0.99926835,"punctuated_word":"Life"},{"word":"moves","start":10.375,"end":10.695,"confidence":0.99976486,"punctuated_word":"moves"},{"word":"pretty","start":10.695,"end":11.014999,"confidence":0.9997489,"punctuated_word":"pretty"},{"word":"fast","start":11.014999,"end":11.514999,"confidence":0.9996352,"punctuated_word":"fast."},{"word":"you","start":11.975,"end":12.215,"confidence":0.957061,"punctuated_word":"You"},{"word":"don't","start":12.215,"end":12.455,"confidence":0.99992,"punctuated_word":"don't"},{"word":"stop","start":12.455,"end":12.695,"confidence":0.99986804,"punctuated_word":"stop"},{"word":"and","start":12.695,"end":12.855,"confidence":0.9994267,"punctuated_word":"and"},{"word":"look","start":12.855,"end":13.014999,"confidence":0.9998821,"punctuated_word":"look"},{"word":"around","start":13.014999,"end":13.334999,"confidence":0.9997942,"punctuated_word":"around"},{"word":"once","start":13.334999,"end":13.575,"confidence":0.9990452,"punctuated_word":"once"},{"word":"in","start":13.575,"end":13.735,"confidence":0.9969682,"punctuated_word":"in"},{"word":"a","start":13.735,"end":13.815,"confidence":0.9727506,"punctuated_word":"a"},{"word":"while","start":13.815,"end":14.315,"confidence":0.95074844,"punctuated_word":"while,"},{"word":"you","start":14.561313,"end":14.7213125,"confidence":0.99757296,"punctuated_word":"you"},{"word":"could","start":14.7213125,"end":14.961312,"confidence":0.99941814,"punctuated_word":"could"},{"word":"miss","start":14.961312,"end":15.461312,"confidence":0.99948657,"punctuated_word":"miss"},{"word":"it","start":17.281313,"end":17.566313,"confidence":0.99636185,"punctuated_word":"it."}]}]}],"summary":{"short":"Yep. I said it before, and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.","result":"success"}}} \ No newline at end of file +{"metadata":{"transaction_key":"deprecated","request_id":"29040720-d419-48c5-88b1-5933fa01c132","sha256":"5324da68ede209a16ac69a38e8cd29cee4d754434a041166cda3a1f5e0b24566","created":"2024-08-26T16:47:46.221Z","duration":17.566313,"channels":1,"models":["1abfe86b-e047-4eed-858a-35e5625b41ee"],"model_info":{"1abfe86b-e047-4eed-858a-35e5625b41ee":{"name":"2-general-nova","version":"2024-01-06.5664","arch":"nova-2"}},"summary_info":{"model_uuid":"67875a7f-c9c4-48a0-aa55-5bdb8a91c34a"}},"results":{"channels":[{"alternatives":[{"transcript":"Yep. I said it before, and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.","confidence":0.99953806,"words":[{"word":"yep","start":5.6,"end":6.1,"confidence":0.9976238,"punctuated_word":"Yep."},{"word":"i","start":7.04,"end":7.2799997,"confidence":0.71035343,"punctuated_word":"I"},{"word":"said","start":7.2799997,"end":7.52,"confidence":0.96610147,"punctuated_word":"said"},{"word":"it","start":7.52,"end":7.6,"confidence":0.99953806,"punctuated_word":"it"},{"word":"before","start":7.6,"end":7.9199996,"confidence":0.8144645,"punctuated_word":"before,"},{"word":"and","start":7.9199996,"end":8.08,"confidence":0.9998975,"punctuated_word":"and"},{"word":"i'll","start":8.08,"end":8.24,"confidence":0.99988437,"punctuated_word":"I'll"},{"word":"say","start":8.24,"end":8.48,"confidence":0.9997116,"punctuated_word":"say"},{"word":"it","start":8.48,"end":8.639999,"confidence":0.9998079,"punctuated_word":"it"},{"word":"again","start":8.639999,"end":9.139999,"confidence":0.95415795,"punctuated_word":"again."},{"word":"life","start":9.991312,"end":10.391313,"confidence":0.99934644,"punctuated_word":"Life"},{"word":"moves","start":10.391313,"end":10.711312,"confidence":0.99980146,"punctuated_word":"moves"},{"word":"pretty","start":10.711312,"end":11.031313,"confidence":0.9998349,"punctuated_word":"pretty"},{"word":"fast","start":11.031313,"end":11.531313,"confidence":0.9997705,"punctuated_word":"fast."},{"word":"you","start":11.991312,"end":12.231313,"confidence":0.9602717,"punctuated_word":"You"},{"word":"don't","start":12.231313,"end":12.4713125,"confidence":0.99991965,"punctuated_word":"don't"},{"word":"stop","start":12.4713125,"end":12.711312,"confidence":0.99985266,"punctuated_word":"stop"},{"word":"and","start":12.711312,"end":12.871312,"confidence":0.99942976,"punctuated_word":"and"},{"word":"look","start":12.871312,"end":13.031313,"confidence":0.999892,"punctuated_word":"look"},{"word":"around","start":13.031313,"end":13.351313,"confidence":0.9998568,"punctuated_word":"around"},{"word":"once","start":13.351313,"end":13.591312,"confidence":0.99925345,"punctuated_word":"once"},{"word":"in","start":13.591312,"end":13.671312,"confidence":0.9984509,"punctuated_word":"in"},{"word":"a","start":13.671312,"end":13.831312,"confidence":0.9846156,"punctuated_word":"a"},{"word":"while","start":13.831312,"end":14.331312,"confidence":0.94432104,"punctuated_word":"while,"},{"word":"you","start":14.631312,"end":14.791312,"confidence":0.9986889,"punctuated_word":"you"},{"word":"could","start":14.791312,"end":14.951313,"confidence":0.9996587,"punctuated_word":"could"},{"word":"miss","start":14.951313,"end":15.191313,"confidence":0.99969184,"punctuated_word":"miss"},{"word":"it","start":15.191313,"end":15.691313,"confidence":0.99777055,"punctuated_word":"it."}]}]}],"summary":{"short":"Yep. I said it before, and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.","result":"success"}}} \ No newline at end of file diff --git a/tests/response_data/bfae00d50d521f470ff9d1943f32225fcfeffe51eff47984886930b71fae0929-response.json b/tests/response_data/bfae00d50d521f470ff9d1943f32225fcfeffe51eff47984886930b71fae0929-response.json index 0c05fb56..2c78f154 100755 --- a/tests/response_data/bfae00d50d521f470ff9d1943f32225fcfeffe51eff47984886930b71fae0929-response.json +++ b/tests/response_data/bfae00d50d521f470ff9d1943f32225fcfeffe51eff47984886930b71fae0929-response.json @@ -1 +1 @@ -{"metadata":{"transaction_key":"deprecated","request_id":"af3c350b-e0b4-45dd-91cc-78bf43694cab","sha256":"5324da68ede209a16ac69a38e8cd29cee4d754434a041166cda3a1f5e0b24566","created":"2024-05-10T19:39:36.327Z","duration":17.566313,"channels":1,"models":["30089e05-99d1-4376-b32e-c263170674af"],"model_info":{"30089e05-99d1-4376-b32e-c263170674af":{"name":"2-general-nova","version":"2024-01-09.29447","arch":"nova-2"}}},"results":{"channels":[{"alternatives":[{"transcript":"Yep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.","confidence":0.998215,"words":[{"word":"yep","start":5.52,"end":6.02,"confidence":0.99847394,"punctuated_word":"Yep."},{"word":"i","start":7.095,"end":7.2549996,"confidence":0.84308124,"punctuated_word":"I"},{"word":"said","start":7.2549996,"end":7.415,"confidence":0.9382116,"punctuated_word":"said"},{"word":"it","start":7.415,"end":7.495,"confidence":0.9984346,"punctuated_word":"it"},{"word":"before","start":7.495,"end":7.975,"confidence":0.9997732,"punctuated_word":"before"},{"word":"and","start":7.975,"end":8.135,"confidence":0.5573371,"punctuated_word":"and"},{"word":"i'll","start":8.135,"end":8.295,"confidence":0.998215,"punctuated_word":"I'll"},{"word":"say","start":8.295,"end":8.455,"confidence":0.9986902,"punctuated_word":"say"},{"word":"it","start":8.455,"end":8.615,"confidence":0.99852645,"punctuated_word":"it"},{"word":"again","start":8.615,"end":9.115,"confidence":0.8480171,"punctuated_word":"again."},{"word":"life","start":9.975,"end":10.295,"confidence":0.99577326,"punctuated_word":"Life"},{"word":"moves","start":10.295,"end":10.695,"confidence":0.99854964,"punctuated_word":"moves"},{"word":"pretty","start":10.695,"end":11.014999,"confidence":0.99935335,"punctuated_word":"pretty"},{"word":"fast","start":11.014999,"end":11.514999,"confidence":0.99927515,"punctuated_word":"fast."},{"word":"you","start":11.975,"end":12.215,"confidence":0.9485283,"punctuated_word":"You"},{"word":"don't","start":12.215,"end":12.455,"confidence":0.99980193,"punctuated_word":"don't"},{"word":"stop","start":12.455,"end":12.695,"confidence":0.9998211,"punctuated_word":"stop"},{"word":"and","start":12.695,"end":12.855,"confidence":0.99849033,"punctuated_word":"and"},{"word":"look","start":12.855,"end":13.094999,"confidence":0.99972147,"punctuated_word":"look"},{"word":"around","start":13.094999,"end":13.334999,"confidence":0.99948287,"punctuated_word":"around"},{"word":"once","start":13.334999,"end":13.575,"confidence":0.9980332,"punctuated_word":"once"},{"word":"in","start":13.575,"end":13.735,"confidence":0.9971307,"punctuated_word":"in"},{"word":"a","start":13.735,"end":13.815,"confidence":0.9540613,"punctuated_word":"a"},{"word":"while","start":13.815,"end":14.315,"confidence":0.97138655,"punctuated_word":"while,"},{"word":"you","start":14.561313,"end":14.7213125,"confidence":0.98991334,"punctuated_word":"you"},{"word":"could","start":14.7213125,"end":14.961312,"confidence":0.99663407,"punctuated_word":"could"},{"word":"miss","start":14.961312,"end":15.461312,"confidence":0.99736553,"punctuated_word":"miss"},{"word":"it","start":17.281313,"end":17.566313,"confidence":0.989954,"punctuated_word":"it."}],"paragraphs":{"transcript":"\nYep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.","paragraphs":[{"sentences":[{"text":"Yep.","start":5.52,"end":6.02},{"text":"I said it before and I'll say it again.","start":7.095,"end":9.115},{"text":"Life moves pretty fast.","start":9.975,"end":11.514999},{"text":"You don't stop and look around once in a while, you could miss it.","start":11.975,"end":17.566313}],"num_words":28,"start":5.52,"end":17.566313}]}}]}]}} \ No newline at end of file +{"metadata":{"transaction_key":"deprecated","request_id":"b4692c7a-6db1-45b5-9c41-65054f098083","sha256":"5324da68ede209a16ac69a38e8cd29cee4d754434a041166cda3a1f5e0b24566","created":"2024-08-26T16:47:43.269Z","duration":17.566313,"channels":1,"models":["30089e05-99d1-4376-b32e-c263170674af"],"model_info":{"30089e05-99d1-4376-b32e-c263170674af":{"name":"2-general-nova","version":"2024-01-09.29447","arch":"nova-2"}}},"results":{"channels":[{"alternatives":[{"transcript":"Yep. I said it before, and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.","confidence":0.99853826,"words":[{"word":"yep","start":5.52,"end":6.02,"confidence":0.99584043,"punctuated_word":"Yep."},{"word":"i","start":7.04,"end":7.2799997,"confidence":0.5126306,"punctuated_word":"I"},{"word":"said","start":7.2799997,"end":7.44,"confidence":0.9672295,"punctuated_word":"said"},{"word":"it","start":7.44,"end":7.6,"confidence":0.9997284,"punctuated_word":"it"},{"word":"before","start":7.6,"end":7.9199996,"confidence":0.7846241,"punctuated_word":"before,"},{"word":"and","start":7.9199996,"end":8.16,"confidence":0.9998627,"punctuated_word":"and"},{"word":"i'll","start":8.16,"end":8.32,"confidence":0.9998944,"punctuated_word":"I'll"},{"word":"say","start":8.32,"end":8.48,"confidence":0.9996517,"punctuated_word":"say"},{"word":"it","start":8.48,"end":8.639999,"confidence":0.99982834,"punctuated_word":"it"},{"word":"again","start":8.639999,"end":9.139999,"confidence":0.97370577,"punctuated_word":"again."},{"word":"life","start":9.991312,"end":10.391313,"confidence":0.9957366,"punctuated_word":"Life"},{"word":"moves","start":10.391313,"end":10.711312,"confidence":0.9988586,"punctuated_word":"moves"},{"word":"pretty","start":10.711312,"end":11.031313,"confidence":0.9996014,"punctuated_word":"pretty"},{"word":"fast","start":11.031313,"end":11.531313,"confidence":0.9995537,"punctuated_word":"fast."},{"word":"you","start":12.071312,"end":12.231313,"confidence":0.9514749,"punctuated_word":"You"},{"word":"don't","start":12.231313,"end":12.4713125,"confidence":0.99988735,"punctuated_word":"don't"},{"word":"stop","start":12.4713125,"end":12.711312,"confidence":0.99979633,"punctuated_word":"stop"},{"word":"and","start":12.711312,"end":12.871312,"confidence":0.9987136,"punctuated_word":"and"},{"word":"look","start":12.871312,"end":13.031313,"confidence":0.9996673,"punctuated_word":"look"},{"word":"around","start":13.031313,"end":13.351313,"confidence":0.9995766,"punctuated_word":"around"},{"word":"once","start":13.351313,"end":13.591312,"confidence":0.998198,"punctuated_word":"once"},{"word":"in","start":13.591312,"end":13.751312,"confidence":0.99853826,"punctuated_word":"in"},{"word":"a","start":13.751312,"end":13.831312,"confidence":0.9861093,"punctuated_word":"a"},{"word":"while","start":13.831312,"end":14.331312,"confidence":0.92627394,"punctuated_word":"while,"},{"word":"you","start":14.631312,"end":14.791312,"confidence":0.997024,"punctuated_word":"you"},{"word":"could","start":14.791312,"end":14.951313,"confidence":0.9983543,"punctuated_word":"could"},{"word":"miss","start":14.951313,"end":15.191313,"confidence":0.9984425,"punctuated_word":"miss"},{"word":"it","start":15.191313,"end":15.691313,"confidence":0.9950415,"punctuated_word":"it."}],"paragraphs":{"transcript":"\nYep. I said it before, and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.","paragraphs":[{"sentences":[{"text":"Yep.","start":5.52,"end":6.02},{"text":"I said it before, and I'll say it again.","start":7.04,"end":9.139999},{"text":"Life moves pretty fast.","start":9.991312,"end":11.531313},{"text":"You don't stop and look around once in a while, you could miss it.","start":12.071312,"end":15.691313}],"num_words":28,"start":5.52,"end":15.691313}]}}]}]}} \ No newline at end of file