Skip to content

Commit

Permalink
Refactor Prerecorded API to Abstract REST Internals Away From User
Browse files Browse the repository at this point in the history
  • Loading branch information
dvonthenen committed Oct 25, 2023
1 parent 39281f8 commit 4d28e39
Show file tree
Hide file tree
Showing 29 changed files with 1,571 additions and 423 deletions.
39 changes: 25 additions & 14 deletions examples/prerecorded/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
package main

import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"strings"

api "github.com/deepgram-devs/deepgram-go-sdk/pkg/api/prerecorded/v1"
prettyjson "github.com/hokaccha/go-prettyjson"

prerecorded "github.com/deepgram-devs/deepgram-go-sdk/pkg/api/prerecorded/v1"
interfaces "github.com/deepgram-devs/deepgram-go-sdk/pkg/client/interfaces"
client "github.com/deepgram-devs/deepgram-go-sdk/pkg/client/prerecorded"
)

Expand All @@ -25,18 +29,21 @@ func main() {
os.Exit(1)
}

dg := client.New(deepgramApiKey)
// context
ctx := context.Background()

prClient := api.New(dg)
c := client.New(deepgramApiKey)
dg := prerecorded.New(c)

filePath := "https://static.deepgram.com/examples/Bueller-Life-moves-pretty-fast.wav"
var res interface{}
var err error

if isURL(filePath) {
res, err = prClient.PreRecordedFromURL(
api.UrlSource{Url: filePath},
api.PreRecordedTranscriptionOptions{
res, err = dg.FromURL(
ctx,
filePath,
interfaces.PreRecordedTranscriptionOptions{
Punctuate: true,
Diarize: true,
Language: "en-US",
Expand All @@ -54,11 +61,10 @@ func main() {
}
defer file.Close()

source := api.ReadStreamSource{Stream: file, Mimetype: "YOUR_FILE_MIME_TYPE"}

res, err = prClient.PreRecordedFromStream(
source,
api.PreRecordedTranscriptionOptions{
res, err = dg.FromStream(
ctx,
file,
interfaces.PreRecordedTranscriptionOptions{
Punctuate: true,
Diarize: true,
Language: "en-US",
Expand All @@ -71,13 +77,18 @@ func main() {
}
}

jsonStr, err := json.MarshalIndent(res, "", " ")
data, err := json.Marshal(res)
if err != nil {
fmt.Println("Error marshaling JSON:", err)
log.Printf("RecognitionResult json.Marshal failed. Err: %v\n", err)
return
}

log.Printf("%s", jsonStr)
prettyJson, err := prettyjson.Format(data)
if err != nil {
log.Printf("prettyjson.Marshal failed. Err: %v\n", err)
return
}
log.Printf("\n\nResult:\n%s\n\n", prettyJson)
}

// Function to check if a string is a valid URL
Expand Down
2 changes: 1 addition & 1 deletion examples/streaming/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func main() {
Punctuate: true,
}

dgClient, err := client.NewWithDefaults(ctx, transcriptOptions)
dgClient, err := client.NewWithDefaults(ctx, "", transcriptOptions)
if err != nil {
log.Println("ERROR creating LiveTranscription connection:", err)
return
Expand Down
17 changes: 17 additions & 0 deletions pkg/api/prerecorded/v1/constants.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright 2023 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

package prerecorded

import (
"errors"
)

var (
// ErrInvalidInput required input was not found
ErrInvalidInput = errors.New("required input was not found")

// ErrInvalidURIExtension couldn't find a period to indicate a file extension
ErrInvalidURIExtension = errors.New("couldn't find a period to indicate a file extension")
)
62 changes: 62 additions & 0 deletions pkg/api/prerecorded/v1/interfaces/interfaces.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2023 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

package interfaces

import (
"encoding/json"
"errors"
"fmt"
"net/http"
"strings"
)

func (resp *PreRecordedResponse) ToWebVTT() (string, error) {
if resp.Results.Utterances == nil {
return "", errors.New("this function requires a transcript that was generated with the utterances feature")
}

vtt := "WEBVTT\n\n"

vtt += "NOTE\nTranscription provided by Deepgram\nRequest ID: " + resp.Metadata.RequestId + "\nCreated: " + resp.Metadata.Created + "\n\n"

for i, utterance := range resp.Results.Utterances {
utterance := utterance
start := SecondsToTimestamp(utterance.Start)
end := SecondsToTimestamp(utterance.End)
vtt += fmt.Sprintf("%d\n%s --> %s\n%s\n\n", i+1, start, end, utterance.Transcript)
}
return vtt, nil
}

func (resp *PreRecordedResponse) ToSRT() (string, error) {
if resp.Results.Utterances == nil {
return "", errors.New("this function requires a transcript that was generated with the utterances feature")
}

srt := ""

for i, utterance := range resp.Results.Utterances {
utterance := utterance
start := SecondsToTimestamp(utterance.Start)
end := SecondsToTimestamp(utterance.End)
end = strings.ReplaceAll(end, ".", ",")
srt += fmt.Sprintf("%d\n%s --> %s\n%s\n\n", i+1, start, end, utterance.Transcript)

}
return srt, nil
}

func SecondsToTimestamp(seconds float64) string {
hours := int(seconds / 3600)
minutes := int((seconds - float64(hours*3600)) / 60)
seconds = seconds - float64(hours*3600) - float64(minutes*60)
return fmt.Sprintf("%02d:%02d:%02.3f", hours, minutes, seconds)
}

func GetJson(resp *http.Response, target interface{}) error {
defer resp.Body.Close()

return json.NewDecoder(resp.Body).Decode(target)
}
140 changes: 140 additions & 0 deletions pkg/api/prerecorded/v1/interfaces/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Copyright 2023 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

package interfaces

// share/common structs
type Metadata struct {
TransactionKey string `json:"transaction_key"`
RequestId string `json:"request_id"`
Sha256 string `json:"sha256"`
Created string `json:"created"`
Duration float64 `json:"duration"`
Channels int `json:"channels"`
Models []string `json:"models"`
ModelInfo map[string]struct {
Name string `json:"name"`
Version string `json:"version"`
Arch string `json:"arch"`
} `json:"model_info"`
Warnings []*Warning `json:"warnings,omitempty"`
}

type Warning struct {
Parameter string `json:"parameter"`
Type string `json:"type"`
Message string `json:"message"`
}

type Hit struct {
Confidence float64 `json:"confidence"`
Start float64 `json:"start"`
End float64 `json:"end"`
Snippet string `json:"snippet"`
}

type Search struct {
Query string `json:"query"`
Hits []Hit `json:"hits"`
}

type WordBase struct {
Word string `json:"word"`
Start float64 `json:"start"`
End float64 `json:"end"`
Confidence float64 `json:"confidence"`
Speaker *int `json:"speaker,omitempty"`
SpeakerConfidence float64 `json:"speaker_confidence,omitempty"`
Punctuated_Word string `json:"punctuated_word,omitempty"`
Sentiment string `json:"sentiment,omitempty"`
}

type Alternative struct {
Transcript string `json:"transcript"`
Confidence float64 `json:"confidence"`
Words []WordBase `json:"words"`
Summaries []*SummaryV1 `json:"summaries,omitempty"`
Paragraphs *ParagraphGroup `json:"paragraphs,omitempty"`
Topics []*TopicBase `json:"topics,omitempty"`
Entities []*EntityBase `json:"entities,omitempty"`
}

type ParagraphGroup struct {
Transcript string `json:"transcript"`
Paragraphs []ParagraphBase `json:"paragraphs"`
}

type ParagraphBase struct {
Sentences []SentenceBase `json:"sentences"`
NumWords int `json:"num_words"`
Start float64 `json:"start"`
End float64 `json:"end"`
}

type SentenceBase struct {
Text string `json:"text"`
Start float64 `json:"start"`
End float64 `json:"end"`
}

type EntityBase struct {
Label string `json:"label"`
Value string `json:"value"`
Confidence float64 `json:"confidence"`
StartWord int `json:"start_word"`
EndWord int `json:"end_word"`
}

type TopicBase struct {
Text string `json:"text"`
StartWord int `json:"start_word"`
EndWord int `json:"end_word"`
Topics []Topic `json:"topics"`
}

type Topic struct {
Topic string `json:"topic"`
Confidence float64 `json:"confidence"`
}

type Channel struct {
Search []*Search `json:"search,omitempty"`
Alternatives []Alternative `json:"alternatives"`
DetectedLanguage string `json:"detected_language,omitempty"`
}

type Utterance struct {
Start float64 `json:"start"`
End float64 `json:"end"`
Confidence float64 `json:"confidence"`
Channel int `json:"channel"`
Transcript string `json:"transcript"`
Words []WordBase `json:"words"`
Speaker *int `json:"speaker,omitempty"`
Id string `json:"id"`
}

type Results struct {
Utterances []*Utterance `json:"utterances,omitempty"`
Channels []Channel `json:"channels"`
Summary *SummaryV2 `json:"summary,omitempty"`
}

type SummaryV1 struct {
Summary string `json:"summary"`
StartWord int `json:"start_word"`
EndWord int `json:"end_word"`
}

type SummaryV2 struct {
Short string `json:"short"`
Result string `json:"result"`
}

// Response
type PreRecordedResponse struct {
Request_id string `json:"request_id,omitempty"`
Metadata Metadata `json:"metadata"`
Results Results `json:"results"`
}
Loading

0 comments on commit 4d28e39

Please sign in to comment.