Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Encapsulate the Prerecorded Object to Hide Internal Details #103

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions examples/prerecorded/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
package main

import (
"context"
"encoding/json"
"fmt"
"log"
"os"
"strings"

api "github.com/deepgram-devs/deepgram-go-sdk/pkg/api/prerecorded/v1"
prettyjson "github.com/hokaccha/go-prettyjson"

prerecorded "github.com/deepgram-devs/deepgram-go-sdk/pkg/api/prerecorded/v1"
interfaces "github.com/deepgram-devs/deepgram-go-sdk/pkg/client/interfaces"
client "github.com/deepgram-devs/deepgram-go-sdk/pkg/client/prerecorded"
)

Expand All @@ -25,18 +29,21 @@ func main() {
os.Exit(1)
}

dg := client.New(deepgramApiKey)
// context
ctx := context.Background()

prClient := api.New(dg)
c := client.New(deepgramApiKey)
dg := prerecorded.New(c)

filePath := "https://static.deepgram.com/examples/Bueller-Life-moves-pretty-fast.wav"
var res interface{}
var err error

if isURL(filePath) {
res, err = prClient.PreRecordedFromURL(
api.UrlSource{Url: filePath},
api.PreRecordedTranscriptionOptions{
res, err = dg.FromURL(
ctx,
filePath,
interfaces.PreRecordedTranscriptionOptions{
Punctuate: true,
Diarize: true,
Language: "en-US",
Expand All @@ -54,11 +61,10 @@ func main() {
}
defer file.Close()

source := api.ReadStreamSource{Stream: file, Mimetype: "YOUR_FILE_MIME_TYPE"}

res, err = prClient.PreRecordedFromStream(
source,
api.PreRecordedTranscriptionOptions{
res, err = dg.FromStream(
ctx,
file,
interfaces.PreRecordedTranscriptionOptions{
Punctuate: true,
Diarize: true,
Language: "en-US",
Expand All @@ -71,13 +77,18 @@ func main() {
}
}

jsonStr, err := json.MarshalIndent(res, "", " ")
data, err := json.Marshal(res)
if err != nil {
fmt.Println("Error marshaling JSON:", err)
log.Printf("RecognitionResult json.Marshal failed. Err: %v\n", err)
return
}

log.Printf("%s", jsonStr)
prettyJson, err := prettyjson.Format(data)
if err != nil {
log.Printf("prettyjson.Marshal failed. Err: %v\n", err)
return
}
log.Printf("\n\nResult:\n%s\n\n", prettyJson)
}

// Function to check if a string is a valid URL
Expand Down
38 changes: 0 additions & 38 deletions examples/projects/main.go

This file was deleted.

2 changes: 1 addition & 1 deletion examples/streaming/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func main() {
Punctuate: true,
}

dgClient, err := client.NewWithDefaults(ctx, transcriptOptions)
dgClient, err := client.NewWithDefaults(ctx, "", transcriptOptions)
if err != nil {
log.Println("ERROR creating LiveTranscription connection:", err)
return
Expand Down
17 changes: 17 additions & 0 deletions pkg/api/prerecorded/v1/constants.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright 2023 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

package prerecorded

import (
"errors"
)

var (
// ErrInvalidInput required input was not found
ErrInvalidInput = errors.New("required input was not found")

// ErrInvalidURIExtension couldn't find a period to indicate a file extension
ErrInvalidURIExtension = errors.New("couldn't find a period to indicate a file extension")
)
62 changes: 62 additions & 0 deletions pkg/api/prerecorded/v1/interfaces/interfaces.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2023 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

package interfaces

import (
"encoding/json"
"errors"
"fmt"
"net/http"
"strings"
)

func (resp *PreRecordedResponse) ToWebVTT() (string, error) {
if resp.Results.Utterances == nil {
return "", errors.New("this function requires a transcript that was generated with the utterances feature")
}

vtt := "WEBVTT\n\n"

vtt += "NOTE\nTranscription provided by Deepgram\nRequest ID: " + resp.Metadata.RequestId + "\nCreated: " + resp.Metadata.Created + "\n\n"

for i, utterance := range resp.Results.Utterances {
utterance := utterance
start := SecondsToTimestamp(utterance.Start)
end := SecondsToTimestamp(utterance.End)
vtt += fmt.Sprintf("%d\n%s --> %s\n%s\n\n", i+1, start, end, utterance.Transcript)
}
return vtt, nil
}

func (resp *PreRecordedResponse) ToSRT() (string, error) {
if resp.Results.Utterances == nil {
return "", errors.New("this function requires a transcript that was generated with the utterances feature")
}

srt := ""

for i, utterance := range resp.Results.Utterances {
utterance := utterance
start := SecondsToTimestamp(utterance.Start)
end := SecondsToTimestamp(utterance.End)
end = strings.ReplaceAll(end, ".", ",")
srt += fmt.Sprintf("%d\n%s --> %s\n%s\n\n", i+1, start, end, utterance.Transcript)

}
return srt, nil
}

func SecondsToTimestamp(seconds float64) string {
hours := int(seconds / 3600)
minutes := int((seconds - float64(hours*3600)) / 60)
seconds = seconds - float64(hours*3600) - float64(minutes*60)
return fmt.Sprintf("%02d:%02d:%02.3f", hours, minutes, seconds)
}

func GetJson(resp *http.Response, target interface{}) error {
defer resp.Body.Close()

return json.NewDecoder(resp.Body).Decode(target)
}
140 changes: 140 additions & 0 deletions pkg/api/prerecorded/v1/interfaces/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Copyright 2023 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

package interfaces

// share/common structs
type Metadata struct {
TransactionKey string `json:"transaction_key"`
RequestId string `json:"request_id"`
Sha256 string `json:"sha256"`
Created string `json:"created"`
Duration float64 `json:"duration"`
Channels int `json:"channels"`
Models []string `json:"models"`
ModelInfo map[string]struct {
Name string `json:"name"`
Version string `json:"version"`
Arch string `json:"arch"`
} `json:"model_info"`
Warnings []*Warning `json:"warnings,omitempty"`
}

type Warning struct {
Parameter string `json:"parameter"`
Type string `json:"type"`
Message string `json:"message"`
}

type Hit struct {
Confidence float64 `json:"confidence"`
Start float64 `json:"start"`
End float64 `json:"end"`
Snippet string `json:"snippet"`
}

type Search struct {
Query string `json:"query"`
Hits []Hit `json:"hits"`
}

type WordBase struct {
Word string `json:"word"`
Start float64 `json:"start"`
End float64 `json:"end"`
Confidence float64 `json:"confidence"`
Speaker *int `json:"speaker,omitempty"`
SpeakerConfidence float64 `json:"speaker_confidence,omitempty"`
Punctuated_Word string `json:"punctuated_word,omitempty"`
Sentiment string `json:"sentiment,omitempty"`
}

type Alternative struct {
Transcript string `json:"transcript"`
Confidence float64 `json:"confidence"`
Words []WordBase `json:"words"`
Summaries []*SummaryV1 `json:"summaries,omitempty"`
Paragraphs *ParagraphGroup `json:"paragraphs,omitempty"`
Topics []*TopicBase `json:"topics,omitempty"`
Entities []*EntityBase `json:"entities,omitempty"`
}

type ParagraphGroup struct {
Transcript string `json:"transcript"`
Paragraphs []ParagraphBase `json:"paragraphs"`
}

type ParagraphBase struct {
Sentences []SentenceBase `json:"sentences"`
NumWords int `json:"num_words"`
Start float64 `json:"start"`
End float64 `json:"end"`
}

type SentenceBase struct {
Text string `json:"text"`
Start float64 `json:"start"`
End float64 `json:"end"`
}

type EntityBase struct {
Label string `json:"label"`
Value string `json:"value"`
Confidence float64 `json:"confidence"`
StartWord int `json:"start_word"`
EndWord int `json:"end_word"`
}

type TopicBase struct {
Text string `json:"text"`
StartWord int `json:"start_word"`
EndWord int `json:"end_word"`
Topics []Topic `json:"topics"`
}

type Topic struct {
Topic string `json:"topic"`
Confidence float64 `json:"confidence"`
}

type Channel struct {
Search []*Search `json:"search,omitempty"`
Alternatives []Alternative `json:"alternatives"`
DetectedLanguage string `json:"detected_language,omitempty"`
}

type Utterance struct {
Start float64 `json:"start"`
End float64 `json:"end"`
Confidence float64 `json:"confidence"`
Channel int `json:"channel"`
Transcript string `json:"transcript"`
Words []WordBase `json:"words"`
Speaker *int `json:"speaker,omitempty"`
Id string `json:"id"`
}

type Results struct {
Utterances []*Utterance `json:"utterances,omitempty"`
Channels []Channel `json:"channels"`
Summary *SummaryV2 `json:"summary,omitempty"`
}

type SummaryV1 struct {
Summary string `json:"summary"`
StartWord int `json:"start_word"`
EndWord int `json:"end_word"`
}

type SummaryV2 struct {
Short string `json:"short"`
Result string `json:"result"`
}

// Response
type PreRecordedResponse struct {
Request_id string `json:"request_id,omitempty"`
Metadata Metadata `json:"metadata"`
Results Results `json:"results"`
}
Loading