diff --git a/.github/workflows/tests-daily.yaml b/.github/workflows/tests-daily.yaml new file mode 100644 index 00000000..c10d7db6 --- /dev/null +++ b/.github/workflows/tests-daily.yaml @@ -0,0 +1,72 @@ +name: Test - Daily + +on: + workflow_dispatch: + repository_dispatch: + types: + - manual-daily-text + schedule: + - cron: "0 9 * * *" + +jobs: + daily-tests: + name: Daily Tests + # Only run this job if we're in the main repo, not a fork. + if: github.repository == 'deepgram/deepgram-python-sdk' + runs-on: ubuntu-latest + permissions: + pull-requests: write + timeout-minutes: 30 + steps: + + - name: Checkout code by commit + uses: actions/checkout@v4 + + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Config git + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + shell: bash + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global init.defaultBranch main + git config --global pull.rebase true + git config --global url."https://git:$GITHUB_TOKEN@github.com".insteadOf "https://github.com" + + - name: Get dependencies + shell: bash + run: | + make ensure-deps + + - name: Install Dependencies + run: | + pip install -r requirements.txt + pip install -r requirements-dev.txt + pip install -r examples/requirements-examples.txt + + - name: Run all checks + shell: bash + env: + DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} + run: | + make daily-test + + - name: Get dependencies + shell: bash + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + BRANCH_NAME="response-shape-${{ github.run_id }}" + git checkout -b "$BRANCH_NAME" + + # create a PR + git add -A ./tests/response_data + git commit -s -m "auto-generated - update Response Shapes" + git push origin "$BRANCH_NAME" + gh pr create --title "auto-generated - update Response Shapes" --body "auto-generated - update Response Shapes" --base "main" --head "$BRANCH_NAME" + sleep 10 + gh pr merge "$BRANCH_NAME" --delete-branch --squash --admin diff --git a/.github/workflows/tests-daily.yaml.DISABLE b/.github/workflows/tests-daily.yaml.DISABLE deleted file mode 100644 index 9793e36b..00000000 --- a/.github/workflows/tests-daily.yaml.DISABLE +++ /dev/null @@ -1,57 +0,0 @@ -name: Build - Daily Official Build - -on: - workflow_dispatch: - repository_dispatch: - types: - - manual-daily-build - schedule: - - cron: "0 9 * * *" - -jobs: - build-release: - name: Daily Build Tests - # Only run this job if we're in the main repo, not a fork. - if: github.repository == 'deepgram/deepgram-go-sdk' - runs-on: ubuntu-latest - steps: - - - name: Checkout code by commit - uses: actions/checkout@v4 - - - name: Set up Go 1.x - uses: actions/setup-go@v3 - with: - go-version: "1.19" - id: go - - - name: Config git - env: - GITHUB_TOKEN: ${{ secrets.GH_RELEASE_ACCESS_TOKEN }} - shell: bash - run: | - git config --global init.defaultBranch main - git config --global pull.rebase true - git config --global url."https://git:$GITHUB_TOKEN@github.com".insteadOf "https://github.com" - - - name: Get dependencies - shell: bash - run: | - make ensure-deps - - - name: Get dependencies - shell: bash - run: | - BRANCH_NAME="response-shape-${{ github.run_id }}" - git checkout -b "$BRANCH_NAME" - - # run daily tests - go test -v -run TestDaily_ ./... - - # create a PR - git add -A ./tests/response_data - git commit -s -m "auto-generated - update Response Shapes" - git push origin "$BRANCH_NAME" - gh pr create --title "auto-generated - update API spec" --body "auto-generated - update API spec" --base "main" - sleep 30 - gh pr merge "$BRANCH_NAME" --delete-branch --squash --admin diff --git a/.github/workflows/tests-unit.yaml b/.github/workflows/tests-unit.yaml new file mode 100644 index 00000000..d391aeed --- /dev/null +++ b/.github/workflows/tests-unit.yaml @@ -0,0 +1,37 @@ +name: Test - Unit + +on: + pull_request: + types: + - assigned + - opened + - synchronize + - reopened +jobs: + build: + name: Unit Tests + # Only run this job if we're in the main repo, not a fork. + if: github.repository == 'deepgram/deepgram-python-sdk' + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + + - name: Checkout code by commit + uses: actions/checkout@v4 + + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install Dependencies + run: | + pip install -r requirements.txt + pip install -r requirements-dev.txt + pip install -r examples/requirements-examples.txt + + - name: Run all checks + shell: bash + env: + DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} + run: | + make unit-test diff --git a/.github/workflows/tests-unit.yaml.DISABLE b/.github/workflows/tests-unit.yaml.DISABLE deleted file mode 100644 index f63c7fd4..00000000 --- a/.github/workflows/tests-unit.yaml.DISABLE +++ /dev/null @@ -1,47 +0,0 @@ -name: Go Tests - -on: - pull_request: - types: - - assigned - - opened - - synchronize - - reopened -jobs: - build: - name: Go Tests - # Only run this job if we're in the main repo, not a fork. - if: github.repository == 'deepgram/deepgram-go-sdk' - runs-on: ubuntu-latest - timeout-minutes: 5 - steps: - - - name: Checkout code by commit - uses: actions/checkout@v4 - - - name: Set up Go - uses: actions/setup-go@v4 - with: - go-version: "1.19" - - - name: Ensure dependencies installed - shell: bash - run: | - make ensure-deps - - - name: Go Tidy - shell: bash - run: go mod tidy - - - name: Go Mod - shell: bash - run: go mod download - - - name: Go Mod Verify - shell: bash - run: go mod verify - - - name: Run Tests - shell: bash - run: | - go test -v -run Test_ ./... diff --git a/.gitignore b/.gitignore index df7ea4ee..05330721 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,8 @@ venv.bak/ __pycache__ *.egg-info dist/ +.mypy_cache/ +.pytest_cache/ # build build/ diff --git a/Makefile b/Makefile index 6a838ff4..012e60e6 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ version: #### display version of components @echo 'GOARCH: $(GOARCH)' @echo 'go version: $(shell go version)' -.PHONY: check lint pylint format black blackformat lint_files lint_diff static mypy mdlint shellcheck actionlint yamllint ### Performs all of the checks, lint'ing, etc available +.PHONY: check lint pylint format black blackformat lint-files lint-diff static mypy mdlint shellcheck actionlint yamllint ### Performs all of the checks, lint'ing, etc available check: lint static mdlint shellcheck actionlint yamllint .PHONY: ensure-deps @@ -44,22 +44,21 @@ ensure-deps: #### Ensure that all required dependency utilities are downloaded o GO_MODULES=$(shell find . -path "*/go.mod" | xargs -I _ dirname _) PYTHON_FILES=. -lint_files: PYTHON_FILES=deepgram/ examples/ -lint_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$') +lint-files: PYTHON_FILES=deepgram/ examples/ +lint-diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$') -lint_files lint_diff: #### Performs Python formatting +lint-files lint-diff: #### Performs Python formatting black --target-version py310 $(PYTHON_FILES) -black blackformat format: lint_files +black blackformat format: lint-files -pylint: lint_files #### Performs Python linting +pylint: lint-files #### Performs Python linting pylint --disable=W0622 --disable=W0404 --disable=W0611 --rcfile .pylintrc deepgram lint: pylint #### Performs Golang programming lint -static_files: PYTHON_FILES=deepgram/ static mypy: #### Performs static analysis - mypy --config-file mypy.ini --python-version 3.10 --exclude examples --exclude tests/edge_cases --exclude tests/expected_failures $(PYTHON_FILES) + mypy --config-file mypy.ini --python-version 3.10 --exclude tests --exclude examples $(PYTHON_FILES) mdlint: #### Performs Markdown lint # mdlint rules with common errors and possible fixes can be found here: @@ -75,3 +74,17 @@ yamllint: #### Performs yaml lint actionlint: #### Performs GitHub Actions lint actionlint ##### LINTING TARGETS + +##### TESTING TARGETS + +.PHONY: test daily-test unit-test +test: daily-test unit-test #### Run ALL tests + +daily-test: #### Run daily tests + @echo "Running daily tests" + python -m pytest -k daily_test + +unit-test: #### Run unit tests + @echo "Running unit tests" + python -m pytest -k unit_test +##### TESTING TARGETS \ No newline at end of file diff --git a/README.md b/README.md index 8c01194d..09a8836c 100644 --- a/README.md +++ b/README.md @@ -254,14 +254,32 @@ pip install -r requirements.txt pip install -e . ``` -### Testing +### Daily and Unit Tests -If you are looking to contribute or modify pytest code, then you need to install the following dependencies: +If you are looking to use, run, contribute or modify to the daily/unit tests, then you need to install the following dependencies: ```bash pip install -r requirements-dev.txt ``` +#### Daily Tests + +The daily tests invoke a series of checks against the actual/real API endpoint and save the results in the `tests/response_data` folder. This response data is updated nightly to reflect the latest response from the server. Running the daily tests does require a `DEEPGRAM_API_KEY` set in your environment variables. + +To run the Daily Tests: + +```bash +make daily-test +``` + +#### Unit Tests + +The unit tests invoke a series of checks against mock endpoints using the responses saved in `tests/response_data` from the daily tests. These tests are meant to simulate running against the endpoint without actually reaching out to the endpoint; running the unit tests does require a `DEEPGRAM_API_KEY` set in your environment variables, but you will not actually reach out to the server. + +```bash +make unit-test +``` + ## Getting Help We love to hear from you so if you have questions, comments or find a bug in the diff --git a/deepgram/clients/abstract_async_client.py b/deepgram/clients/abstract_async_client.py index 7e3dab7a..03b86a74 100644 --- a/deepgram/clients/abstract_async_client.py +++ b/deepgram/clients/abstract_async_client.py @@ -217,7 +217,12 @@ async def _handle_request( timeout = httpx.Timeout(30.0, connect=10.0) try: - async with httpx.AsyncClient(timeout=timeout) as client: + transport = kwargs.get("transport") + async with httpx.AsyncClient( + timeout=timeout, transport=transport + ) as client: + if transport: + kwargs.pop("transport") response = await client.request( method, _url, headers=_headers, **kwargs ) @@ -269,7 +274,12 @@ async def _handle_request_memory( timeout = httpx.Timeout(30.0, connect=10.0) try: - async with httpx.AsyncClient(timeout=timeout) as client: + transport = kwargs.get("transport") + async with httpx.AsyncClient( + timeout=timeout, transport=transport + ) as client: + if transport: + kwargs.pop("transport") response = await client.request( method, _url, headers=_headers, **kwargs ) @@ -334,7 +344,11 @@ async def _handle_request_raw( timeout = httpx.Timeout(30.0, connect=10.0) try: - client = httpx.AsyncClient(timeout=timeout) + transport = kwargs.get("transport") + client = httpx.AsyncClient(timeout=timeout, transport=transport) + if transport: + kwargs.pop("transport") + kwargs.pop("transport") req = client.build_request(method, _url, headers=_headers, **kwargs) return await client.send(req, stream=True) diff --git a/deepgram/clients/abstract_sync_client.py b/deepgram/clients/abstract_sync_client.py index 0d5dc813..6e9d2976 100644 --- a/deepgram/clients/abstract_sync_client.py +++ b/deepgram/clients/abstract_sync_client.py @@ -217,7 +217,10 @@ def _handle_request( timeout = httpx.Timeout(30.0, connect=10.0) try: - with httpx.Client(timeout=timeout) as client: + transport = kwargs.get("transport") + with httpx.Client(timeout=timeout, transport=transport) as client: + if transport: + kwargs.pop("transport") response = client.request(method, _url, headers=_headers, **kwargs) response.raise_for_status() return response.text @@ -267,7 +270,10 @@ def _handle_request_memory( timeout = httpx.Timeout(30.0, connect=10.0) try: - with httpx.Client(timeout=timeout) as client: + transport = kwargs.get("transport") + with httpx.Client(timeout=timeout, transport=transport) as client: + if transport: + kwargs.pop("transport") response = client.request(method, _url, headers=_headers, **kwargs) response.raise_for_status() @@ -330,7 +336,10 @@ def _handle_request_raw( timeout = httpx.Timeout(30.0, connect=10.0) try: - client = httpx.Client(timeout=timeout) + transport = kwargs.get("transport") + with httpx.Client(timeout=timeout, transport=transport) as client: + if transport: + kwargs.pop("transport") req = client.build_request(method, _url, headers=_headers, **kwargs) return client.send(req, stream=True) diff --git a/deepgram/clients/analyze/v1/async_client.py b/deepgram/clients/analyze/v1/async_client.py index 69d6bc94..1884afd5 100644 --- a/deepgram/clients/analyze/v1/async_client.py +++ b/deepgram/clients/analyze/v1/async_client.py @@ -45,6 +45,7 @@ async def analyze_url( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/read", + **kwargs, ) -> Union[AsyncAnalyzeResponse, AnalyzeResponse]: """ Analyze text from a URL source. @@ -72,6 +73,7 @@ async def analyze_url( addons=addons, timeout=timeout, endpoint=endpoint, + **kwargs, ) url = f"{self._config.url}/{endpoint}" @@ -102,6 +104,7 @@ async def analyze_url( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AnalyzeResponse.from_json(result) @@ -119,6 +122,7 @@ async def analyze_url_callback( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/read", + **kwargs, ) -> AsyncAnalyzeResponse: """ Transcribes audio from a URL source and sends the result to a callback URL. @@ -171,6 +175,7 @@ async def analyze_url_callback( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AsyncAnalyzeResponse.from_json(result) @@ -187,6 +192,7 @@ async def analyze_text( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/read", + **kwargs, ) -> Union[AsyncAnalyzeResponse, AnalyzeResponse]: """ Analyze text from a local file source. @@ -214,6 +220,7 @@ async def analyze_text( addons=addons, timeout=timeout, endpoint=endpoint, + **kwargs, ) url = f"{self._config.url}/{endpoint}" @@ -245,6 +252,7 @@ async def analyze_text( headers=headers, content=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AnalyzeResponse.from_json(result) @@ -262,6 +270,7 @@ async def analyze_text_callback( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/read", + **kwargs, ) -> AsyncAnalyzeResponse: """ Transcribes audio from a local file source and sends the result to a callback URL. @@ -315,6 +324,7 @@ async def analyze_text_callback( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AsyncAnalyzeResponse.from_json(result) diff --git a/deepgram/clients/analyze/v1/client.py b/deepgram/clients/analyze/v1/client.py index 602694b1..a2ef5d94 100644 --- a/deepgram/clients/analyze/v1/client.py +++ b/deepgram/clients/analyze/v1/client.py @@ -45,6 +45,7 @@ def analyze_url( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/read", + **kwargs, ) -> Union[AnalyzeResponse, AsyncAnalyzeResponse]: """ Analyze text from a URL source. @@ -76,6 +77,7 @@ def analyze_url( headers=headers, timeout=timeout, endpoint=endpoint, + **kwargs, ) url = f"{self._config.url}/{endpoint}" @@ -106,6 +108,7 @@ def analyze_url( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AnalyzeResponse.from_json(result) @@ -123,6 +126,7 @@ def analyze_url_callback( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/read", + **kwargs, ) -> AsyncAnalyzeResponse: """ Transcribes audio from a URL source and sends the result to a callback URL. @@ -175,6 +179,7 @@ def analyze_url_callback( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AsyncAnalyzeResponse.from_json(result) @@ -191,6 +196,7 @@ def analyze_text( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/read", + **kwargs, ) -> Union[AnalyzeResponse, AsyncAnalyzeResponse]: """ Analyze text from a local file source. @@ -222,6 +228,7 @@ def analyze_text( headers=headers, timeout=timeout, endpoint=endpoint, + **kwargs, ) url = f"{self._config.url}/{endpoint}" @@ -253,6 +260,7 @@ def analyze_text( headers=headers, content=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AnalyzeResponse.from_json(result) @@ -270,6 +278,7 @@ def analyze_text_callback( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/read", + **kwargs, ) -> AsyncAnalyzeResponse: """ Transcribes audio from a local file source and sends the result to a callback URL. @@ -323,6 +332,7 @@ def analyze_text_callback( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AsyncAnalyzeResponse.from_json(result) diff --git a/deepgram/clients/analyze/v1/response.py b/deepgram/clients/analyze/v1/response.py index 346daa1b..3a09cce9 100644 --- a/deepgram/clients/analyze/v1/response.py +++ b/deepgram/clients/analyze/v1/response.py @@ -2,7 +2,7 @@ # Use of this source code is governed by a MIT license that can be found in the LICENSE file. # SPDX-License-Identifier: MIT -from typing import List, Optional +from typing import List, Optional, Dict, Any from dataclasses import dataclass, field from dataclasses_json import config as dataclass_config, DataClassJsonMixin @@ -10,30 +10,58 @@ from ...common import Sentiment -# Async Analyze Response Types: +# Base Classes: @dataclass -class AsyncAnalyzeResponse(DataClassJsonMixin): +class BaseResponse(DataClassJsonMixin): """ - Async Analyze Response + BaseResponse class used to define the common methods and properties for all response classes. """ - request_id: str = "" - def __getitem__(self, key): _dict = self.to_dict() return _dict[key] + def __setitem__(self, key, val): + self.__dict__[key] = val + def __str__(self) -> str: return self.to_json(indent=4) + def eval(self, key: str) -> str: + """ + This method is used to evaluate a key in the response object using a dot notation style method. + """ + keys = key.split(".") + result: Dict[Any, Any] = self.to_dict() + for k in keys: + if isinstance(result, dict) and k in result: + result = result[k] + elif isinstance(result, list) and k.isdigit() and int(k) < len(result): + result = result[int(k)] + else: + return "" + return str(result) + + +# Async Analyze Response Types: + + +@dataclass +class AsyncAnalyzeResponse(BaseResponse): + """ + Async Analyze Response + """ + + request_id: str = "" + # Analyze Response Types: @dataclass -class IntentsInfo(DataClassJsonMixin): +class IntentsInfo(BaseResponse): """ Intents Info """ @@ -42,16 +70,9 @@ class IntentsInfo(DataClassJsonMixin): input_tokens: int = 0 output_tokens: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class SentimentInfo(DataClassJsonMixin): +class SentimentInfo(BaseResponse): """ Sentiment Info """ @@ -60,16 +81,9 @@ class SentimentInfo(DataClassJsonMixin): input_tokens: int = 0 output_tokens: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class SummaryInfo(DataClassJsonMixin): +class SummaryInfo(BaseResponse): """ Summary Info """ @@ -78,16 +92,9 @@ class SummaryInfo(DataClassJsonMixin): input_tokens: int = 0 output_tokens: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class TopicsInfo(DataClassJsonMixin): +class TopicsInfo(BaseResponse): """ Topics Info """ @@ -96,16 +103,9 @@ class TopicsInfo(DataClassJsonMixin): input_tokens: int = 0 output_tokens: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Metadata(DataClassJsonMixin): +class Metadata(BaseResponse): """ Metadata """ @@ -138,12 +138,9 @@ def __getitem__(self, key): _dict["topics_info"] = TopicsInfo.from_dict(_dict["topics_info"]) return _dict[key] - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Average(DataClassJsonMixin): +class Average(BaseResponse): """ Average """ @@ -157,28 +154,18 @@ def __getitem__(self, key): _dict["sentiment"] = Sentiment.from_dict(_dict["sentiment"]) return _dict[key] - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Summary(DataClassJsonMixin): +class Summary(BaseResponse): """ Summary """ text: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Topic(DataClassJsonMixin): +class Topic(BaseResponse): """ Topic """ @@ -186,16 +173,9 @@ class Topic(DataClassJsonMixin): topic: str = "" confidence_score: float = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Intent(DataClassJsonMixin): +class Intent(BaseResponse): """ Intent """ @@ -203,16 +183,9 @@ class Intent(DataClassJsonMixin): intent: str = "" confidence_score: float = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Segment(DataClassJsonMixin): +class Segment(BaseResponse): """ Segment """ @@ -241,12 +214,9 @@ def __getitem__(self, key): _dict["topics"] = Topic.from_dict(_dict["topics"]) return _dict[key] - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Sentiments(DataClassJsonMixin): +class Sentiments(BaseResponse): """ Sentiments """ @@ -264,12 +234,9 @@ def __getitem__(self, key): _dict["average"] = Average.from_dict(_dict["average"]) return _dict[key] - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Topics(DataClassJsonMixin): +class Topics(BaseResponse): """ Topics """ @@ -284,12 +251,9 @@ def __getitem__(self, key): ] return _dict[key] - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Intents(DataClassJsonMixin): +class Intents(BaseResponse): """ Intents """ @@ -304,12 +268,9 @@ def __getitem__(self, key): ] return _dict[key] - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Results(DataClassJsonMixin): +class Results(BaseResponse): """ Results """ @@ -339,15 +300,12 @@ def __getitem__(self, key): _dict["intents"] = Intents.from_dict(_dict["intents"]) return _dict[key] - def __str__(self) -> str: - return self.to_json(indent=4) - # Analyze Response Result: @dataclass -class AnalyzeResponse(DataClassJsonMixin): +class AnalyzeResponse(BaseResponse): """ Analyze Response """ @@ -367,8 +325,5 @@ def __getitem__(self, key): _dict["results"] = Results.from_dict(_dict["results"]) return _dict[key] - def __str__(self) -> str: - return self.to_json(indent=4) - SyncAnalyzeResponse = AnalyzeResponse diff --git a/deepgram/clients/listen/v1/rest/async_client.py b/deepgram/clients/listen/v1/rest/async_client.py index 4263cf25..18b9a42a 100644 --- a/deepgram/clients/listen/v1/rest/async_client.py +++ b/deepgram/clients/listen/v1/rest/async_client.py @@ -46,6 +46,7 @@ async def transcribe_url( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/listen", + **kwargs, ) -> Union[AsyncPrerecordedResponse, PrerecordedResponse]: """ Transcribes audio from a URL source. @@ -77,6 +78,7 @@ async def transcribe_url( headers=headers, timeout=timeout, endpoint=endpoint, + **kwargs, ) url = f"{self._config.url}/{endpoint}" @@ -107,6 +109,7 @@ async def transcribe_url( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = PrerecordedResponse.from_json(result) @@ -124,6 +127,7 @@ async def transcribe_url_callback( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/listen", + **kwargs, ) -> AsyncPrerecordedResponse: """ Transcribes audio from a URL source and sends the result to a callback URL. @@ -176,6 +180,7 @@ async def transcribe_url_callback( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AsyncPrerecordedResponse.from_json(result) @@ -192,6 +197,7 @@ async def transcribe_file( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/listen", + **kwargs, ) -> Union[AsyncPrerecordedResponse, PrerecordedResponse]: """ Transcribes audio from a local file source. @@ -223,6 +229,7 @@ async def transcribe_file( headers=headers, timeout=timeout, endpoint=endpoint, + **kwargs, ) url = f"{self._config.url}/{endpoint}" @@ -254,6 +261,7 @@ async def transcribe_file( headers=headers, content=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = PrerecordedResponse.from_json(result) @@ -271,6 +279,7 @@ async def transcribe_file_callback( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/listen", + **kwargs, ) -> AsyncPrerecordedResponse: """ Transcribes audio from a local file source and sends the result to a callback URL. @@ -324,6 +333,7 @@ async def transcribe_file_callback( headers=headers, content=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AsyncPrerecordedResponse.from_json(result) diff --git a/deepgram/clients/listen/v1/rest/client.py b/deepgram/clients/listen/v1/rest/client.py index e2604f4b..12553f4d 100644 --- a/deepgram/clients/listen/v1/rest/client.py +++ b/deepgram/clients/listen/v1/rest/client.py @@ -46,6 +46,7 @@ def transcribe_url( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/listen", + **kwargs, ) -> Union[AsyncPrerecordedResponse, PrerecordedResponse]: """ Transcribes audio from a URL source. @@ -77,6 +78,7 @@ def transcribe_url( headers=headers, timeout=timeout, endpoint=endpoint, + **kwargs, ) url = f"{self._config.url}/{endpoint}" @@ -107,6 +109,7 @@ def transcribe_url( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = PrerecordedResponse.from_json(result) @@ -124,6 +127,7 @@ def transcribe_url_callback( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/listen", + **kwargs, ) -> AsyncPrerecordedResponse: """ Transcribes audio from a URL source and sends the result to a callback URL. @@ -176,6 +180,7 @@ def transcribe_url_callback( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AsyncPrerecordedResponse.from_json(result) @@ -192,6 +197,7 @@ def transcribe_file( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/listen", + **kwargs, ) -> Union[AsyncPrerecordedResponse, PrerecordedResponse]: """ Transcribes audio from a local file source. @@ -223,6 +229,7 @@ def transcribe_file( headers=headers, timeout=timeout, endpoint=endpoint, + **kwargs, ) url = f"{self._config.url}/{endpoint}" @@ -255,6 +262,7 @@ def transcribe_file( headers=headers, content=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = PrerecordedResponse.from_json(result) @@ -272,6 +280,7 @@ def transcribe_file_callback( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/listen", + **kwargs, ) -> AsyncPrerecordedResponse: """ Transcribes audio from a local file source and sends the result to a callback URL. @@ -325,6 +334,7 @@ def transcribe_file_callback( headers=headers, content=body, timeout=timeout, + **kwargs, ) self._logger.info("json: %s", result) res = AsyncPrerecordedResponse.from_json(result) diff --git a/deepgram/clients/listen/v1/rest/response.py b/deepgram/clients/listen/v1/rest/response.py index b2e5afb6..7d9d478b 100644 --- a/deepgram/clients/listen/v1/rest/response.py +++ b/deepgram/clients/listen/v1/rest/response.py @@ -2,24 +2,23 @@ # Use of this source code is governed by a MIT license that can be found in the LICENSE file. # SPDX-License-Identifier: MIT -from typing import List, Optional, Dict +from typing import List, Optional, Dict, Any from dataclasses import dataclass, field from dataclasses_json import config as dataclass_config, DataClassJsonMixin from ....common import Sentiment -# Async Prerecorded Response Types: + +# Base Classes: @dataclass -class AsyncPrerecordedResponse(DataClassJsonMixin): +class BaseResponse(DataClassJsonMixin): """ - The response object for the async prerecorded API. + BaseResponse class used to define the common methods and properties for all response classes. """ - request_id: str = "" - def __getitem__(self, key): _dict = self.to_dict() return _dict[key] @@ -30,12 +29,39 @@ def __setitem__(self, key, val): def __str__(self) -> str: return self.to_json(indent=4) + def eval(self, key: str) -> str: + """ + This method is used to evaluate a key in the response object using a dot notation style method. + """ + keys = key.split(".") + result: Dict[Any, Any] = self.to_dict() + for k in keys: + if isinstance(result, dict) and k in result: + result = result[k] + elif isinstance(result, list) and k.isdigit() and int(k) < len(result): + result = result[int(k)] + else: + return "" + return str(result) + + +# Async Prerecorded Response Types: + + +@dataclass +class AsyncPrerecordedResponse(BaseResponse): + """ + The response object for the async prerecorded API. + """ + + request_id: str = "" + # Prerecorded Response Types: @dataclass -class SummaryInfo(DataClassJsonMixin): +class SummaryInfo(BaseResponse): """ The summary information for the response. """ @@ -44,19 +70,9 @@ class SummaryInfo(DataClassJsonMixin): output_tokens: int = 0 model_uuid: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class ModelInfo(DataClassJsonMixin): +class ModelInfo(BaseResponse): """ The model information for the response. """ @@ -65,19 +81,9 @@ class ModelInfo(DataClassJsonMixin): version: str = "" arch: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class IntentsInfo(DataClassJsonMixin): +class IntentsInfo(BaseResponse): """ The intents information for the response. """ @@ -86,19 +92,9 @@ class IntentsInfo(DataClassJsonMixin): input_tokens: int = 0 output_tokens: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class SentimentInfo(DataClassJsonMixin): +class SentimentInfo(BaseResponse): """ The sentiment information for the response. """ @@ -107,19 +103,9 @@ class SentimentInfo(DataClassJsonMixin): input_tokens: int = 0 output_tokens: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class TopicsInfo(DataClassJsonMixin): +class TopicsInfo(BaseResponse): """ The topics information for the response. """ @@ -128,19 +114,9 @@ class TopicsInfo(DataClassJsonMixin): input_tokens: int = 0 output_tokens: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Metadata(DataClassJsonMixin): # pylint: disable=too-many-instance-attributes +class Metadata(BaseResponse): # pylint: disable=too-many-instance-attributes """ The metadata for the response. """ @@ -203,15 +179,9 @@ def __getitem__(self, key): _dict["extra"] = [str(extra) for _, extra in _dict["extra"].items()] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class SummaryV1(DataClassJsonMixin): +class SummaryV1(BaseResponse): """ The summary information for the response. """ @@ -220,22 +190,12 @@ class SummaryV1(DataClassJsonMixin): start_word: float = 0 end_word: float = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - Summaries = SummaryV1 @dataclass -class SummaryV2(DataClassJsonMixin): +class SummaryV2(BaseResponse): """ The summary information for the response. """ @@ -243,22 +203,12 @@ class SummaryV2(DataClassJsonMixin): result: str = "" short: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - Summary = SummaryV2 @dataclass -class Hit(DataClassJsonMixin): +class Hit(BaseResponse): """ The hit information for the response. """ @@ -268,19 +218,9 @@ class Hit(DataClassJsonMixin): end: float = 0 snippet: Optional[str] = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Word(DataClassJsonMixin): # pylint: disable=too-many-instance-attributes +class Word(BaseResponse): # pylint: disable=too-many-instance-attributes """ The word information for the response. """ @@ -314,15 +254,9 @@ def __getitem__(self, key): _dict["sentiment"] = Sentiment.from_dict(_dict["sentiment"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Sentence(DataClassJsonMixin): +class Sentence(BaseResponse): """ The sentence information for the response. """ @@ -343,15 +277,9 @@ def __getitem__(self, key): _dict["sentiment"] = Sentiment.from_dict(_dict["sentiment"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Paragraph(DataClassJsonMixin): +class Paragraph(BaseResponse): """ The paragraph information for the response. """ @@ -380,15 +308,9 @@ def __getitem__(self, key): _dict["sentiment"] = Sentiment.from_dict(_dict["sentiment"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Paragraphs(DataClassJsonMixin): +class Paragraphs(BaseResponse): """ The paragraphs information for the response. """ @@ -406,15 +328,9 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Translation(DataClassJsonMixin): +class Translation(BaseResponse): """ The translation information for the response. """ @@ -422,16 +338,6 @@ class Translation(DataClassJsonMixin): language: Optional[str] = "" translation: Optional[str] = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass class Warning( @@ -445,19 +351,9 @@ class Warning( type: str = "" message: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Search(DataClassJsonMixin): +class Search(BaseResponse): """ The search information for the response. """ @@ -471,15 +367,9 @@ def __getitem__(self, key): _dict["hits"] = [Hit.from_dict(hits) for hits in _dict["hits"]] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Utterance(DataClassJsonMixin): # pylint: disable=too-many-instance-attributes +class Utterance(BaseResponse): # pylint: disable=too-many-instance-attributes """ The utterance information for the response. """ @@ -509,15 +399,9 @@ def __getitem__(self, key): _dict["sentiment"] = Sentiment.from_dict(_dict["sentiment"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Entity(DataClassJsonMixin): +class Entity(BaseResponse): """ The entity information for the response. """ @@ -528,19 +412,9 @@ class Entity(DataClassJsonMixin): start_word: float = 0 end_word: float = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Alternative(DataClassJsonMixin): # pylint: disable=too-many-instance-attributes +class Alternative(BaseResponse): # pylint: disable=too-many-instance-attributes """ The alternative information for the response. """ @@ -585,15 +459,9 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Channel(DataClassJsonMixin): +class Channel(BaseResponse): """ The channel information for the response. """ @@ -620,15 +488,9 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Intent(DataClassJsonMixin): +class Intent(BaseResponse): """ The intent information for the response. """ @@ -636,19 +498,9 @@ class Intent(DataClassJsonMixin): intent: str = "" confidence_score: float = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Average(DataClassJsonMixin): +class Average(BaseResponse): """ The average information for the response. """ @@ -662,15 +514,9 @@ def __getitem__(self, key): _dict["sentiment"] = Sentiment.from_dict(_dict["sentiment"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Topic(DataClassJsonMixin): +class Topic(BaseResponse): """ The topic information for the response. """ @@ -678,19 +524,9 @@ class Topic(DataClassJsonMixin): topic: str = "" confidence_score: float = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Segment(DataClassJsonMixin): +class Segment(BaseResponse): """ The segment information for the response. """ @@ -723,15 +559,9 @@ def __getitem__(self, key): _dict["topics"] = [Topic.from_dict(topics) for topics in _dict["topics"]] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Sentiments(DataClassJsonMixin): +class Sentiments(BaseResponse): """ The sentiments information for the response. """ @@ -753,15 +583,9 @@ def __getitem__(self, key): _dict["average"] = Average.from_dict(_dict["average"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Topics(DataClassJsonMixin): +class Topics(BaseResponse): """ The topics information for the response. """ @@ -778,15 +602,9 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Intents(DataClassJsonMixin): +class Intents(BaseResponse): """ The intents information for the response. """ @@ -803,15 +621,9 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Results(DataClassJsonMixin): +class Results(BaseResponse): """ The results information for the response. """ @@ -855,18 +667,12 @@ def __getitem__(self, key): _dict["intents"] = Intents.from_dict(_dict["intents"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Prerecorded Response Result: @dataclass -class PrerecordedResponse(DataClassJsonMixin): +class PrerecordedResponse(BaseResponse): """ The response object for the prerecorded API. """ @@ -886,11 +692,5 @@ def __getitem__(self, key): _dict["results"] = Results.from_dict(_dict["results"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - SyncPrerecordedResponse = PrerecordedResponse diff --git a/deepgram/clients/listen/v1/websocket/async_client.py b/deepgram/clients/listen/v1/websocket/async_client.py index caa52e44..87603e7a 100644 --- a/deepgram/clients/listen/v1/websocket/async_client.py +++ b/deepgram/clients/listen/v1/websocket/async_client.py @@ -219,6 +219,12 @@ async def start( raise return False + def is_connected(self) -> bool: + """ + Returns the connection status of the WebSocket. + """ + return self._socket is not None + # pylint: enable=too-many-branches,too-many-statements def on(self, event: LiveTranscriptionEvents, handler) -> None: diff --git a/deepgram/clients/listen/v1/websocket/client.py b/deepgram/clients/listen/v1/websocket/client.py index fe585ab9..8f074800 100644 --- a/deepgram/clients/listen/v1/websocket/client.py +++ b/deepgram/clients/listen/v1/websocket/client.py @@ -222,6 +222,12 @@ def start( raise e return False + def is_connected(self) -> bool: + """ + Returns the connection status of the WebSocket. + """ + return self._socket is not None + # pylint: enable=too-many-statements,too-many-branches def on( diff --git a/deepgram/clients/listen/v1/websocket/options.py b/deepgram/clients/listen/v1/websocket/options.py index b2d5ef5a..70c0671a 100644 --- a/deepgram/clients/listen/v1/websocket/options.py +++ b/deepgram/clients/listen/v1/websocket/options.py @@ -142,7 +142,7 @@ def check(self): "WARNING: Tier is deprecated. Will be removed in a future version." ) - if isinstance(self.endpointing) == str: + if isinstance(self.endpointing, str): logger.warning( "WARNING: endpointing's current type previous was `Optional[str]` which is incorrect" " for backward compatibility we are keeping it as `Optional[Union[str, bool, int]]`" diff --git a/deepgram/clients/listen/v1/websocket/response.py b/deepgram/clients/listen/v1/websocket/response.py index bf5dcf27..d4af34c1 100644 --- a/deepgram/clients/listen/v1/websocket/response.py +++ b/deepgram/clients/listen/v1/websocket/response.py @@ -2,22 +2,18 @@ # Use of this source code is governed by a MIT license that can be found in the LICENSE file. # SPDX-License-Identifier: MIT -from typing import List, Optional, Dict +from typing import List, Optional, Dict, Any from dataclasses import dataclass, field from dataclasses_json import config as dataclass_config, DataClassJsonMixin -# Result Message - @dataclass -class OpenResponse(DataClassJsonMixin): +class BaseResponse(DataClassJsonMixin): """ - Open Message from the Deepgram Platform + BaseResponse class used to define the common methods and properties for all response classes. """ - type: str = "" - def __getitem__(self, key): _dict = self.to_dict() return _dict[key] @@ -28,9 +24,36 @@ def __setitem__(self, key, val): def __str__(self) -> str: return self.to_json(indent=4) + def eval(self, key: str) -> str: + """ + This method is used to evaluate a key in the response object using a dot notation style method. + """ + keys = key.split(".") + result: Dict[Any, Any] = self.to_dict() + for k in keys: + if isinstance(result, dict) and k in result: + result = result[k] + elif isinstance(result, list) and k.isdigit() and int(k) < len(result): + result = result[int(k)] + else: + return "" + return str(result) + + +# Result Message + + +@dataclass +class OpenResponse(BaseResponse): + """ + Open Message from the Deepgram Platform + """ + + type: str = "" + @dataclass -class Word(DataClassJsonMixin): +class Word(BaseResponse): """ Word object """ @@ -49,19 +72,9 @@ class Word(DataClassJsonMixin): default=None, metadata=dataclass_config(exclude=lambda f: f is None) ) - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Alternative(DataClassJsonMixin): +class Alternative(BaseResponse): """ Alternative object """ @@ -79,15 +92,9 @@ def __getitem__(self, key): _dict["words"] = [Word.from_dict(words) for words in _dict["words"]] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Channel(DataClassJsonMixin): +class Channel(BaseResponse): """ Channel object """ @@ -103,15 +110,9 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class ModelInfo(DataClassJsonMixin): +class ModelInfo(BaseResponse): """ ModelInfo object """ @@ -120,19 +121,9 @@ class ModelInfo(DataClassJsonMixin): version: str = "" arch: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Metadata(DataClassJsonMixin): +class Metadata(BaseResponse): """ Metadata object """ @@ -154,12 +145,6 @@ def __getitem__(self, key): _dict["extra"] = [str(extra) for _, extra in _dict["extra"].items()] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass class LiveResultResponse( @@ -193,12 +178,6 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Metadata Message @@ -241,18 +220,12 @@ def __getitem__(self, key): _dict["extra"] = [str(extra) for _, extra in _dict["extra"].items()] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Speech Started Message @dataclass -class SpeechStartedResponse(DataClassJsonMixin): +class SpeechStartedResponse(BaseResponse): """ SpeechStartedResponse Message from the Deepgram Platform """ @@ -261,22 +234,12 @@ class SpeechStartedResponse(DataClassJsonMixin): channel: List[int] = field(default_factory=list) timestamp: float = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Utterance End Message @dataclass -class UtteranceEndResponse(DataClassJsonMixin): +class UtteranceEndResponse(BaseResponse): """ UtteranceEnd Message from the Deepgram Platform """ @@ -285,44 +248,24 @@ class UtteranceEndResponse(DataClassJsonMixin): channel: List[int] = field(default_factory=list) last_word_end: float = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Close Message @dataclass -class CloseResponse(DataClassJsonMixin): +class CloseResponse(BaseResponse): """ Close Message from the Deepgram Platform """ type: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Error Message @dataclass -class ErrorResponse(DataClassJsonMixin): +class ErrorResponse(BaseResponse): """ Error Message from the Deepgram Platform """ @@ -332,35 +275,15 @@ class ErrorResponse(DataClassJsonMixin): type: str = "" variant: Optional[str] = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Unhandled Message @dataclass -class UnhandledResponse(DataClassJsonMixin): +class UnhandledResponse(BaseResponse): """ Unhandled Message from the Deepgram Platform """ type: str = "" raw: str = "" - - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) diff --git a/deepgram/clients/manage/v1/response.py b/deepgram/clients/manage/v1/response.py index e3e1bf03..884bcfac 100644 --- a/deepgram/clients/manage/v1/response.py +++ b/deepgram/clients/manage/v1/response.py @@ -2,23 +2,18 @@ # Use of this source code is governed by a MIT license that can be found in the LICENSE file. # SPDX-License-Identifier: MIT -from typing import List, Optional +from typing import List, Optional, Dict, Any from dataclasses import dataclass, field from dataclasses_json import config as dataclass_config, DataClassJsonMixin -# Result Message - - @dataclass -class Message(DataClassJsonMixin): +class BaseResponse(DataClassJsonMixin): """ - Message from the Deepgram Platform + BaseResponse class used to define the common methods and properties for all response classes. """ - message: str = "" - def __getitem__(self, key): _dict = self.to_dict() return _dict[key] @@ -29,12 +24,39 @@ def __setitem__(self, key, val): def __str__(self) -> str: return self.to_json(indent=4) + def eval(self, key: str) -> str: + """ + This method is used to evaluate a key in the response object using a dot notation style method. + """ + keys = key.split(".") + result: Dict[Any, Any] = self.to_dict() + for k in keys: + if isinstance(result, dict) and k in result: + result = result[k] + elif isinstance(result, list) and k.isdigit() and int(k) < len(result): + result = result[int(k)] + else: + return "" + return str(result) + + +# Result Message + + +@dataclass +class Message(BaseResponse): + """ + Message from the Deepgram Platform + """ + + message: str = "" + # Projects @dataclass -class Project(DataClassJsonMixin): +class Project(BaseResponse): """ Project object """ @@ -42,19 +64,9 @@ class Project(DataClassJsonMixin): project_id: str = "" name: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class ProjectsResponse(DataClassJsonMixin): +class ProjectsResponse(BaseResponse): """ Projects Response object """ @@ -69,18 +81,12 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Members @dataclass -class Member(DataClassJsonMixin): +class Member(BaseResponse): """ Member object """ @@ -90,19 +96,9 @@ class Member(DataClassJsonMixin): last_name: str = "" member_id: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class MembersResponse(DataClassJsonMixin): +class MembersResponse(BaseResponse): """ Members Response object """ @@ -117,18 +113,12 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Keys @dataclass -class Key(DataClassJsonMixin): +class Key(BaseResponse): """ Key object """ @@ -147,15 +137,9 @@ def __getitem__(self, key): _dict["scopes"] = [str(scopes) for scopes in _dict["scopes"]] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class KeyResponse(DataClassJsonMixin): +class KeyResponse(BaseResponse): """ Key Response object """ @@ -171,15 +155,9 @@ def __getitem__(self, key): _dict["member"] = Member.from_dict(_dict["member"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class KeysResponse(DataClassJsonMixin): +class KeysResponse(BaseResponse): """ Keys Response object """ @@ -194,18 +172,12 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Scopes @dataclass -class ScopesResponse(DataClassJsonMixin): +class ScopesResponse(BaseResponse): """ Scopes Response object """ @@ -218,18 +190,12 @@ def __getitem__(self, key): _dict["scopes"] = [str(scopes) for scopes in _dict["scopes"]] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Invites @dataclass -class Invite(DataClassJsonMixin): +class Invite(BaseResponse): """ Invite object """ @@ -237,19 +203,9 @@ class Invite(DataClassJsonMixin): email: str = "" scope: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class InvitesResponse(DataClassJsonMixin): +class InvitesResponse(BaseResponse): """ Invites Response object """ @@ -264,18 +220,12 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Usage @dataclass -class Config(DataClassJsonMixin): # pylint: disable=too-many-instance-attributes +class Config(BaseResponse): # pylint: disable=too-many-instance-attributes """ Config object """ @@ -310,19 +260,9 @@ class Config(DataClassJsonMixin): # pylint: disable=too-many-instance-attribute default=None, metadata=dataclass_config(exclude=lambda f: f is None) ) - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Details(DataClassJsonMixin): # pylint: disable=too-many-instance-attributes +class Details(BaseResponse): # pylint: disable=too-many-instance-attributes """ Details object """ @@ -353,15 +293,9 @@ def __getitem__(self, key): _dict["config"] = Config.from_dict(_dict["config"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Callback(DataClassJsonMixin): +class Callback(BaseResponse): """ Callback object """ @@ -370,19 +304,9 @@ class Callback(DataClassJsonMixin): code: int = 0 completed: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class TokenDetail(DataClassJsonMixin): +class TokenDetail(BaseResponse): """ Token Detail object """ @@ -392,19 +316,9 @@ class TokenDetail(DataClassJsonMixin): model: str = "" output: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class SpeechSegment(DataClassJsonMixin): +class SpeechSegment(BaseResponse): """ Speech Segment object """ @@ -413,19 +327,9 @@ class SpeechSegment(DataClassJsonMixin): model: str = "" tier: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class TTSDetails(DataClassJsonMixin): +class TTSDetails(BaseResponse): """ TTS Details object """ @@ -445,15 +349,9 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Response(DataClassJsonMixin): +class Response(BaseResponse): """ Response object """ @@ -481,15 +379,9 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class UsageRequest(DataClassJsonMixin): # pylint: disable=too-many-instance-attributes +class UsageRequest(BaseResponse): # pylint: disable=too-many-instance-attributes """ Usage Request object """ @@ -515,15 +407,9 @@ def __getitem__(self, key): _dict["callback"] = Callback.from_dict(_dict["callback"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class UsageRequestsResponse(DataClassJsonMixin): +class UsageRequestsResponse(BaseResponse): """ Usage Requests Response object """ @@ -540,15 +426,9 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Tokens(DataClassJsonMixin): +class Tokens(BaseResponse): """ Tokens object """ @@ -556,19 +436,9 @@ class Tokens(DataClassJsonMixin): tokens_in: int = 0 out: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class TTS(DataClassJsonMixin): +class TTS(BaseResponse): """ TTS object """ @@ -576,19 +446,9 @@ class TTS(DataClassJsonMixin): characters: int = 0 requests: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Results(DataClassJsonMixin): +class Results(BaseResponse): """ Results object """ @@ -609,15 +469,9 @@ def __getitem__(self, key): _dict["tts"] = TTS.from_dict(_dict["tts"]) return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class Resolution(DataClassJsonMixin): +class Resolution(BaseResponse): """ Resolution object """ @@ -625,19 +479,9 @@ class Resolution(DataClassJsonMixin): units: str = "" amount: int = 0 - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class UsageSummaryResponse(DataClassJsonMixin): +class UsageSummaryResponse(BaseResponse): """ Usage Summary Response object """ @@ -657,15 +501,9 @@ def __getitem__(self, key): ] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class UsageModel(DataClassJsonMixin): +class UsageModel(BaseResponse): """ Usage Model object """ @@ -675,19 +513,9 @@ class UsageModel(DataClassJsonMixin): version: str = "" model_id: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class UsageFieldsResponse(DataClassJsonMixin): +class UsageFieldsResponse(BaseResponse): """ Usage Fields Response object """ @@ -721,18 +549,12 @@ def __getitem__(self, key): _dict["languages"] = [str(languages) for languages in _dict["languages"]] return _dict[key] - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Billing @dataclass -class Balance(DataClassJsonMixin): +class Balance(BaseResponse): """ Balance object """ @@ -742,19 +564,9 @@ class Balance(DataClassJsonMixin): units: str = "" purchase_order_id: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class BalancesResponse(DataClassJsonMixin): +class BalancesResponse(BaseResponse): """ Balances Response object """ @@ -768,9 +580,3 @@ def __getitem__(self, key): Balance.from_dict(balances) for balances in _dict["balances"] ] return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) diff --git a/deepgram/clients/selfhosted/v1/async_client.py b/deepgram/clients/selfhosted/v1/async_client.py index fc1cbc8d..3daf7fa8 100644 --- a/deepgram/clients/selfhosted/v1/async_client.py +++ b/deepgram/clients/selfhosted/v1/async_client.py @@ -35,15 +35,21 @@ def __init__(self, config: DeepgramClientOptions): super().__init__(config) async def list_onprem_credentials( - self, project_id: str, timeout: Optional[httpx.Timeout] = None + self, + project_id: str, + timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ List all on-premises distribution credentials for a project. """ - return self.list_selfhosted_credentials(project_id, timeout) + return self.list_selfhosted_credentials(project_id, timeout=timeout, **kwargs) async def list_selfhosted_credentials( - self, project_id: str, timeout: Optional[httpx.Timeout] = None + self, + project_id: str, + timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ List all on-premises distribution credentials for a project. @@ -52,7 +58,7 @@ async def list_selfhosted_credentials( url = f"{self._config.url}/{self._endpoint}/{project_id}/selfhosted/distribution/credentials" self._logger.info("url: %s", url) self._logger.info("project_id: %s", project_id) - res = await self.get(url, timeout=timeout) + res = await self.get(url, timeout=timeout, **kwargs) self._logger.verbose("result: %s", res) self._logger.notice("list_selfhosted_credentials succeeded") self._logger.debug("SelfHostedClient.list_selfhosted_credentials LEAVE") @@ -63,12 +69,13 @@ async def get_onprem_credentials( project_id: str, distribution_credentials_id: str, timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Get a specific on-premises distribution credential for a project. """ return self.get_selfhosted_credentials( - project_id, distribution_credentials_id, timeout + project_id, distribution_credentials_id, timeout=timeout, **kwargs ) async def get_selfhosted_credentials( @@ -76,6 +83,7 @@ async def get_selfhosted_credentials( project_id: str, distribution_credentials_id: str, timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Get a specific on-premises distribution credential for a project. @@ -87,22 +95,30 @@ async def get_selfhosted_credentials( self._logger.info( "distribution_credentials_id: %s", distribution_credentials_id ) - res = await self.get(url, timeout=timeout) + res = await self.get(url, timeout=timeout, **kwargs) self._logger.verbose("result: %s", res) self._logger.notice("get_selfhosted_credentials succeeded") self._logger.debug("SelfHostedClient.get_selfhosted_credentials LEAVE") return res async def create_onprem_credentials( - self, project_id: str, options, timeout: Optional[httpx.Timeout] = None + self, + project_id: str, + options, + timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Create a new on-premises distribution credential for a project. """ - return self.create_onprem_credentials(project_id, options, timeout) + return self.create_onprem_credentials(project_id, options, timeout, **kwargs) async def create_selfhosted_credentials( - self, project_id: str, options, timeout: Optional[httpx.Timeout] = None + self, + project_id: str, + options, + timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Create a new on-premises distribution credential for a project. @@ -112,7 +128,7 @@ async def create_selfhosted_credentials( self._logger.info("url: %s", url) self._logger.info("project_id: %s", project_id) self._logger.info("options: %s", options) - res = await self.post(url, json=options, timeout=timeout) + res = await self.post(url, json=options, timeout=timeout, **kwargs) self._logger.verbose("result: %s", res) self._logger.notice("create_selfhosted_credentials succeeded") self._logger.debug("SelfHostedClient.create_selfhosted_credentials LEAVE") @@ -123,12 +139,13 @@ async def delete_onprem_credentials( project_id: str, distribution_credentials_id: str, timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Delete an on-premises distribution credential for a project. """ return self.delete_selfhosted_credentials( - project_id, distribution_credentials_id, timeout + project_id, distribution_credentials_id, timeout=timeout, **kwargs ) async def delete_selfhosted_credentials( @@ -136,6 +153,7 @@ async def delete_selfhosted_credentials( project_id: str, distribution_credentials_id: str, timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Delete an on-premises distribution credential for a project. @@ -145,7 +163,7 @@ async def delete_selfhosted_credentials( self._logger.info("url: %s", url) self._logger.info("project_id: %s", project_id) self._logger.info("distrbution_credentials_id: %s", distribution_credentials_id) - res = await self.delete(url, timeout=timeout) + res = await self.delete(url, timeout=timeout, **kwargs) self._logger.verbose("result: %s", res) self._logger.notice("delete_selfhosted_credentials succeeded") self._logger.debug("SelfHostedClient.delete_selfhosted_credentials LEAVE") diff --git a/deepgram/clients/selfhosted/v1/client.py b/deepgram/clients/selfhosted/v1/client.py index 419e704c..94785d66 100644 --- a/deepgram/clients/selfhosted/v1/client.py +++ b/deepgram/clients/selfhosted/v1/client.py @@ -35,15 +35,21 @@ def __init__(self, config: DeepgramClientOptions): super().__init__(config) def list_onprem_credentials( - self, project_id: str, timeout: Optional[httpx.Timeout] = None + self, + project_id: str, + timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ List all on-premises distribution credentials for a project. """ - return self.list_selfhosted_credentials(project_id, timeout) + return self.list_selfhosted_credentials(project_id, timeout=timeout, **kwargs) def list_selfhosted_credentials( - self, project_id: str, timeout: Optional[httpx.Timeout] = None + self, + project_id: str, + timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ List all on-premises distribution credentials for a project. @@ -52,7 +58,7 @@ def list_selfhosted_credentials( url = f"{self._config.url}/{self._endpoint}/{project_id}/selfhosted/distribution/credentials" self._logger.info("url: %s", url) self._logger.info("project_id: %s", project_id) - res = self.get(url, timeout=timeout) + res = self.get(url, timeout=timeout, **kwargs) self._logger.verbose("result: %s", res) self._logger.notice("list_selfhosted_credentials succeeded") self._logger.debug("SelfHostedClient.list_selfhosted_credentials LEAVE") @@ -63,12 +69,13 @@ def get_onprem_credentials( project_id: str, distribution_credentials_id: str, timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Get a specific on-premises distribution credential for a project. """ return self.get_selfhosted_credentials( - project_id, distribution_credentials_id, timeout + project_id, distribution_credentials_id, timeout=timeout, **kwargs ) def get_selfhosted_credentials( @@ -76,6 +83,7 @@ def get_selfhosted_credentials( project_id: str, distribution_credentials_id: str, timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Get a specific on-premises distribution credential for a project. @@ -87,22 +95,32 @@ def get_selfhosted_credentials( self._logger.info( "distribution_credentials_id: %s", distribution_credentials_id ) - res = self.get(url, timeout=timeout) + res = self.get(url, timeout=timeout, **kwargs) self._logger.verbose("result: %s", res) self._logger.notice("get_selfhosted_credentials succeeded") self._logger.debug("SelfHostedClient.get_selfhosted_credentials LEAVE") return res def create_onprem_credentials( - self, project_id: str, options, timeout: Optional[httpx.Timeout] = None + self, + project_id: str, + options, + timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Create a new on-premises distribution credential for a project. """ - return self.create_selfhosted_credentials(project_id, options, timeout) + return self.create_selfhosted_credentials( + project_id, options, timeout=timeout, **kwargs + ) def create_selfhosted_credentials( - self, project_id: str, options, timeout: Optional[httpx.Timeout] = None + self, + project_id: str, + options, + timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Create a new on-premises distribution credential for a project. @@ -112,7 +130,7 @@ def create_selfhosted_credentials( self._logger.info("url: %s", url) self._logger.info("project_id: %s", project_id) self._logger.info("options: %s", options) - res = self.post(url, json=options, timeout=timeout) + res = self.post(url, json=options, timeout=timeout, **kwargs) self._logger.verbose("result: %s", res) self._logger.notice("create_selfhosted_credentials succeeded") self._logger.debug("SelfHostedClient.create_selfhosted_credentials LEAVE") @@ -123,12 +141,13 @@ def delete_onprem_credentials( project_id: str, distribution_credentials_id: str, timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Delete an on-premises distribution credential for a project. """ return self.delete_selfhosted_credentials( - project_id, distribution_credentials_id, timeout + project_id, distribution_credentials_id, timeout=timeout, **kwargs ) def delete_selfhosted_credentials( @@ -136,6 +155,7 @@ def delete_selfhosted_credentials( project_id: str, distribution_credentials_id: str, timeout: Optional[httpx.Timeout] = None, + **kwargs, ): """ Delete an on-premises distribution credential for a project. @@ -145,7 +165,7 @@ def delete_selfhosted_credentials( self._logger.info("url: %s", url) self._logger.info("project_id: %s", project_id) self._logger.info("distrbution_credentials_id: %s", distribution_credentials_id) - res = self.delete(url, timeout=timeout) + res = self.delete(url, timeout=timeout, **kwargs) self._logger.verbose("result: %s", res) self._logger.notice("delete_selfhosted_credentials succeeded") self._logger.debug("SelfHostedClient.delete_selfhosted_credentials LEAVE") diff --git a/deepgram/clients/speak/v1/rest/async_client.py b/deepgram/clients/speak/v1/rest/async_client.py index 62fb88db..c4568ef8 100644 --- a/deepgram/clients/speak/v1/rest/async_client.py +++ b/deepgram/clients/speak/v1/rest/async_client.py @@ -46,6 +46,7 @@ async def stream_raw( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/speak", + **kwargs, ) -> httpx.Response: """ Speak from a text source and store as a Iterator[byte]. @@ -99,6 +100,7 @@ async def stream_raw( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("result: %s", str(result)) @@ -114,6 +116,7 @@ async def stream_memory( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/speak", + **kwargs, ) -> SpeakRESTResponse: """ Speak from a text source and store in memory. @@ -173,6 +176,7 @@ async def stream_memory( json=body, timeout=timeout, file_result=return_vals, + **kwargs, ) self._logger.info("result: %s", result) resp = SpeakRESTResponse( @@ -205,6 +209,7 @@ async def stream( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/speak", + **kwargs, ) -> SpeakRESTResponse: """ DEPRECATED: stream() is deprecated. Use stream_memory() instead. @@ -216,6 +221,7 @@ async def stream( headers=headers, timeout=timeout, endpoint=endpoint, + **kwargs, ) async def file( @@ -226,6 +232,7 @@ async def file( addons: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/speak", + **kwargs, ) -> SpeakRESTResponse: """ Speak from a text source and save to a file. @@ -237,6 +244,7 @@ async def file( addons=addons, timeout=timeout, endpoint=endpoint, + **kwargs, ) async def save( @@ -248,6 +256,7 @@ async def save( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/speak", + **kwargs, ) -> SpeakRESTResponse: """ Speak from a text source and save to a file. @@ -275,6 +284,7 @@ async def save( headers=headers, timeout=timeout, endpoint=endpoint, + **kwargs, ) if res.stream is None: diff --git a/deepgram/clients/speak/v1/rest/client.py b/deepgram/clients/speak/v1/rest/client.py index 7ec5d8d7..03bc3db6 100644 --- a/deepgram/clients/speak/v1/rest/client.py +++ b/deepgram/clients/speak/v1/rest/client.py @@ -45,6 +45,7 @@ def stream_raw( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/speak", + **kwargs, ) -> httpx.Response: """ Speak from a text source and store as a Iterator[byte]. @@ -98,6 +99,7 @@ def stream_raw( headers=headers, json=body, timeout=timeout, + **kwargs, ) self._logger.info("result: %s", str(result)) @@ -113,6 +115,7 @@ def stream_memory( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/speak", + **kwargs, ) -> SpeakRESTResponse: """ Speak from a text source and store in memory. @@ -172,6 +175,7 @@ def stream_memory( json=body, timeout=timeout, file_result=return_vals, + **kwargs, ) self._logger.info("result: %s", result) @@ -205,6 +209,7 @@ def stream( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/speak", + **kwargs, ) -> SpeakRESTResponse: """ DEPRECATED: stream() is deprecated. Use stream_memory() instead. @@ -216,6 +221,7 @@ def stream( headers=headers, timeout=timeout, endpoint=endpoint, + **kwargs, ) async def file( @@ -226,6 +232,7 @@ async def file( addons: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/speak", + **kwargs, ) -> SpeakRESTResponse: """ Speak from a text source and save to a file. @@ -237,6 +244,7 @@ async def file( addons=addons, timeout=timeout, endpoint=endpoint, + **kwargs, ) def save( @@ -248,6 +256,7 @@ def save( headers: Optional[Dict] = None, timeout: Optional[httpx.Timeout] = None, endpoint: str = "v1/speak", + **kwargs, ) -> SpeakRESTResponse: """ Speak from a text source and save to a file. @@ -275,6 +284,7 @@ def save( headers=headers, timeout=timeout, endpoint=endpoint, + **kwargs, ) if res.stream is None: diff --git a/deepgram/clients/speak/v1/rest/response.py b/deepgram/clients/speak/v1/rest/response.py index 68dab599..779f3bc9 100644 --- a/deepgram/clients/speak/v1/rest/response.py +++ b/deepgram/clients/speak/v1/rest/response.py @@ -2,20 +2,53 @@ # Use of this source code is governed by a MIT license that can be found in the LICENSE file. # SPDX-License-Identifier: MIT - -from typing import Optional +from typing import Optional, Dict, Any import io from dataclasses import dataclass, field from dataclasses_json import config as dataclass_config, DataClassJsonMixin + +# Base Classes: + + +@dataclass +class BaseResponse(DataClassJsonMixin): + """ + BaseResponse class used to define the common methods and properties for all response classes. + """ + + def __getitem__(self, key): + _dict = self.to_dict() + return _dict[key] + + def __setitem__(self, key, val): + self.__dict__[key] = val + + def __str__(self) -> str: + return self.to_json(indent=4) + + def eval(self, key: str) -> str: + """ + This method is used to evaluate a key in the response object using a dot notation style method. + """ + keys = key.split(".") + result: Dict[Any, Any] = self.to_dict() + for k in keys: + if isinstance(result, dict) and k in result: + result = result[k] + elif isinstance(result, list) and k.isdigit() and int(k) < len(result): + result = result[int(k)] + else: + return "" + return str(result) + + # Speak Response Types: @dataclass -class SpeakRESTResponse( - DataClassJsonMixin -): # pylint: disable=too-many-instance-attributes +class SpeakRESTResponse(BaseResponse): # pylint: disable=too-many-instance-attributes """ A class for representing a response from the speak endpoint. """ @@ -42,98 +75,45 @@ class SpeakRESTResponse( metadata=dataclass_config(exclude=lambda f: True), ) - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - # this is a hack to make the response look like a dict because of the io.BytesIO object - # otherwise it will throw an exception on printing - def __str__(self) -> str: - my_dict = self.to_dict() - return my_dict.__str__() - @dataclass -class OpenResponse(DataClassJsonMixin): +class OpenResponse(BaseResponse): """ Open Message from the Deepgram Platform """ type: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class MetadataResponse(DataClassJsonMixin): +class MetadataResponse(BaseResponse): """ Metadata object """ request_id: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class FlushedResponse(DataClassJsonMixin): +class FlushedResponse(BaseResponse): """ Flushed Message from the Deepgram Platform """ type: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class CloseResponse(DataClassJsonMixin): +class CloseResponse(BaseResponse): """ Close Message from the Deepgram Platform """ type: str = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - @dataclass -class ErrorResponse(DataClassJsonMixin): +class ErrorResponse(BaseResponse): """ Error Message from the Deepgram Platform """ @@ -143,35 +123,15 @@ class ErrorResponse(DataClassJsonMixin): type: str = "" variant: Optional[str] = "" - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) - # Unhandled Message @dataclass -class UnhandledResponse(DataClassJsonMixin): +class UnhandledResponse(BaseResponse): """ Unhandled Message from the Deepgram Platform """ type: str = "" raw: str = "" - - def __getitem__(self, key): - _dict = self.to_dict() - return _dict[key] - - def __setitem__(self, key, val): - self.__dict__[key] = val - - def __str__(self) -> str: - return self.to_json(indent=4) diff --git a/examples/manage/keys/main.py b/examples/manage/keys/main.py index 81468226..87b7a1a8 100644 --- a/examples/manage/keys/main.py +++ b/examples/manage/keys/main.py @@ -17,7 +17,7 @@ def main(): try: # example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM config = DeepgramClientOptions( - verbose=verboselogs.DEBUG, + verbose=verboselogs.SPAM, ) deepgram: DeepgramClient = DeepgramClient("", config) # otherwise, use default config @@ -48,7 +48,11 @@ def main(): ) # create key - options: KeyOptions = {"comment": "MyTestKey", "scopes": ["member"]} + options: KeyOptions = KeyOptions( + comment="MyTestKey", + scopes=["member:write", "project:read"], + time_to_live_in_seconds=3600, + ) myKeyId = None createResp = deepgram.manage.v("1").create_key(myId, options) diff --git a/examples/speech-to-text/websocket/microphone/main.py b/examples/speech-to-text/websocket/microphone/main.py index 576dfe01..fc2bb37a 100644 --- a/examples/speech-to-text/websocket/microphone/main.py +++ b/examples/speech-to-text/websocket/microphone/main.py @@ -42,6 +42,7 @@ def on_message(self, result, **kwargs): if len(sentence) == 0: return if result.is_final: + print(f"Message: {result.to_json()}") # We need to collect these and concatenate them together when we get a speech_final=true # See docs: https://developers.deepgram.com/docs/understand-endpointing-interim-results is_finals.append(sentence) diff --git a/examples/text-to-speech/rest/file/async_hello_world/main.py b/examples/text-to-speech/rest/file/async_hello_world/main.py index bc0f4411..21445631 100644 --- a/examples/text-to-speech/rest/file/async_hello_world/main.py +++ b/examples/text-to-speech/rest/file/async_hello_world/main.py @@ -15,7 +15,7 @@ load_dotenv() -SPEAK_OPTIONS = {"text": "Hello world!"} +SPEAK_TEXT = {"text": "Hello world!"} filename = "test.mp3" @@ -30,7 +30,7 @@ async def main(): ) response = await deepgram.speak.asyncrest.v("1").save( - filename, SPEAK_OPTIONS, options + filename, SPEAK_TEXT, options ) print(response.to_json(indent=4)) diff --git a/examples/text-to-speech/rest/file/hello_world/main.py b/examples/text-to-speech/rest/file/hello_world/main.py index 0101140a..ef6e3429 100644 --- a/examples/text-to-speech/rest/file/hello_world/main.py +++ b/examples/text-to-speech/rest/file/hello_world/main.py @@ -15,7 +15,7 @@ load_dotenv() -SPEAK_OPTIONS = {"text": "Hello world!"} +SPEAK_TEXT = {"text": "Hello world!"} filename = "test.mp3" @@ -31,7 +31,7 @@ def main(): model="aura-asteria-en", ) - response = deepgram.speak.rest.v("1").save(filename, SPEAK_OPTIONS, options) + response = deepgram.speak.rest.v("1").save(filename, SPEAK_TEXT, options) print(response.to_json(indent=4)) except Exception as e: diff --git a/examples/text-to-speech/rest/file/legacy_dict_hello_world/main.py b/examples/text-to-speech/rest/file/legacy_dict_hello_world/main.py index 939495b3..3c522bbd 100644 --- a/examples/text-to-speech/rest/file/legacy_dict_hello_world/main.py +++ b/examples/text-to-speech/rest/file/legacy_dict_hello_world/main.py @@ -15,7 +15,7 @@ load_dotenv() -SPEAK_OPTIONS = {"text": "Hello world!"} +SPEAK_TEXT = {"text": "Hello world!"} filename = "test.mp3" @@ -29,7 +29,7 @@ def main(): "model": "aura-asteria-en", } - response = deepgram.speak.rest.v("1").save(filename, SPEAK_OPTIONS, options) + response = deepgram.speak.rest.v("1").save(filename, SPEAK_TEXT, options) print(response.to_json(indent=4)) except Exception as e: diff --git a/examples/text-to-speech/rest/file/woodchuck/main.py b/examples/text-to-speech/rest/file/woodchuck/main.py index 54f6badc..1f72661a 100644 --- a/examples/text-to-speech/rest/file/woodchuck/main.py +++ b/examples/text-to-speech/rest/file/woodchuck/main.py @@ -15,7 +15,7 @@ load_dotenv() -SPEAK_OPTIONS = { +SPEAK_TEXT = { "text": "How much wood could a woodchuck chuck? If a woodchuck could chuck wood? As much wood as a woodchuck could chuck, if a woodchuck could chuck wood." } filename = "test.mp3" @@ -31,7 +31,7 @@ def main(): model="aura-asteria-en", ) - response = deepgram.speak.rest.v("1").save(filename, SPEAK_OPTIONS, options) + response = deepgram.speak.rest.v("1").save(filename, SPEAK_TEXT, options) print(response.to_json(indent=4)) except Exception as e: diff --git a/examples/text-to-speech/rest/memory/async_hello_world/main.py b/examples/text-to-speech/rest/memory/async_hello_world/main.py index d73c7579..ffd32969 100644 --- a/examples/text-to-speech/rest/memory/async_hello_world/main.py +++ b/examples/text-to-speech/rest/memory/async_hello_world/main.py @@ -15,7 +15,7 @@ load_dotenv() -SPEAK_OPTIONS = {"text": "Hello world!"} +SPEAK_TEXT = {"text": "Hello world!"} filename = "test.mp3" @@ -30,7 +30,7 @@ async def main(): ) response = await deepgram.speak.asyncrest.v("1").stream_memory( - SPEAK_OPTIONS, options + SPEAK_TEXT, options ) # save to file diff --git a/examples/text-to-speech/rest/memory/hello_world/main.py b/examples/text-to-speech/rest/memory/hello_world/main.py index 0a5aee0f..35288c00 100644 --- a/examples/text-to-speech/rest/memory/hello_world/main.py +++ b/examples/text-to-speech/rest/memory/hello_world/main.py @@ -15,7 +15,7 @@ load_dotenv() -SPEAK_OPTIONS = {"text": "Hello world!"} +SPEAK_TEXT = {"text": "Hello world!"} filename = "test.mp3" @@ -31,7 +31,7 @@ def main(): model="aura-asteria-en", ) - response = deepgram.speak.rest.v("1").stream_memory(SPEAK_OPTIONS, options) + response = deepgram.speak.rest.v("1").stream_memory(SPEAK_TEXT, options) # save to file with open(filename, "wb+") as file: diff --git a/examples/text-to-speech/rest/raw/async_hello_world/main.py b/examples/text-to-speech/rest/raw/async_hello_world/main.py index f13c5387..1f2cf0f0 100644 --- a/examples/text-to-speech/rest/raw/async_hello_world/main.py +++ b/examples/text-to-speech/rest/raw/async_hello_world/main.py @@ -16,7 +16,7 @@ load_dotenv() -SPEAK_OPTIONS = {"text": "Hello world!"} +SPEAK_TEXT = {"text": "Hello world!"} filename = "test.mp3" @@ -33,9 +33,7 @@ async def main(): model="aura-asteria-en", ) - response = await deepgram.speak.asyncrest.v("1").stream_raw( - SPEAK_OPTIONS, options - ) + response = await deepgram.speak.asyncrest.v("1").stream_raw(SPEAK_TEXT, options) print(f"Response: {response}") for header in response.headers: diff --git a/examples/text-to-speech/rest/raw/hello_world/main.py b/examples/text-to-speech/rest/raw/hello_world/main.py index 9ae5770b..2f8d6e0d 100644 --- a/examples/text-to-speech/rest/raw/hello_world/main.py +++ b/examples/text-to-speech/rest/raw/hello_world/main.py @@ -15,7 +15,7 @@ load_dotenv() -SPEAK_OPTIONS = {"text": "Hello world!"} +SPEAK_TEXT = {"text": "Hello world!"} filename = "test.mp3" @@ -32,7 +32,7 @@ def main(): model="aura-asteria-en", ) - response = deepgram.speak.rest.v("1").stream_raw(SPEAK_OPTIONS, options) + response = deepgram.speak.rest.v("1").stream_raw(SPEAK_TEXT, options) for header in response.headers: print(f"{header}: {response.headers[header]}") diff --git a/requirements-dev.txt b/requirements-dev.txt index d05169c3..85d4923d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,10 @@ # pip install -r requirements.txt +# additional requirements for development +soundfile==0.12.1 +numpy==2.0.1 +websocket-server==0.6.4 + # lint, static, etc black==24.* pylint==3.* diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..59827132 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +from .utils import ( + get_query_params, + create_dirs, + save_metadata_bytes, + save_metadata_string, + read_metadata_string, + read_metadata_bytes, + string_match_failure, +) diff --git a/tests/daily_test/conversation.txt b/tests/daily_test/conversation.txt new file mode 100644 index 00000000..2af89e08 --- /dev/null +++ b/tests/daily_test/conversation.txt @@ -0,0 +1,71 @@ +Meet Deepgram Aura: real-time text-to-speech for real-time AI agents +---------- +It’s been a year since large language models (LLMs) seemingly went mainstream overnight (Happy Birthday, ChatGPT!!!), and the world has witnessed both rapid development of these technologies and immense interest in their potential. We believe that we have reached an inflection point where voice-based interfaces will be the primary means to accessing LLMs and the experiences they unlock. Here are a few recent signals in support of our thesis: + +- Good old fashioned voice notes are enjoying a healthy resurgence. + +- According to a recent survey, a majority of respondents stated phone calls are still their preferred communication channel for resolving customer service issues. + +- An emerging boom in wearable devices equipped with continuous listening and speech AI technology is gaining steam. + +- OpenAI recently enabled voice interactions in ChatGPT. + +- A wave of interest in voice-first experiences and tools is sweeping across brands, investors, and tech companies. + +Thanks to ChatGPT and the advent of the LLM era, the conversational AI tech stack has advanced sufficiently to support productive (not frustrating) voice-powered AI assistants and agents that can interact with humans in a natural manner. We have already observed this from our most innovative customers who are actively turning to these technologies to build a diverse range of AI agents for voice ordering systems, interview bots, personal AI assistants, automated drive-thru tellers, and autonomous sales and customer service agents. + +While these AI agents hold immense potential, many customers have expressed their dissatisfaction with the current crop of voice AI vendors, citing roadblocks related to speed, cost, reliability, and conversational quality. That’s why we’re excited to introduce our own text-to-speech (TTS) API, Deepgram Aura, built for real-time, conversational voice AI agents. + +Whether used on its own or in conjunction with our industry-leading Nova-2 speech-to-text API, we’ll soon provide developers with a complete speech AI platform, giving them the essential building blocks they need to build high throughput, real-time AI agents of the future. + +We are thrilled about the progress our initial group of developers has made using Aura, so much so that we are extending limited access to a select few partners who will be free to begin integrating with Aura immediately. With their feedback, we’ll continue to enhance our suite of voices and API features, as well as ensure a smooth launch of their production-grade applications. + + +What Customers Want +---------- +I feel the need, the need for speed +What we’ve heard from many of our customers and partners is that voice AI technology today caters to two main areas: high production or high throughput. + +High Production is all about crafting the perfect voice. It's used in projects where every tone and inflection matters, like in video games or audiobooks, to really bring a scene or story to life. Here, voice quality is king, with creators investing hours to fine-tune every detail for a powerful emotional impact. The primary benefit is the ability to swap out a high-paid voice actor with AI where you have more dynamic control over what’s being said while also achieving some cost savings. But these use cases are more specialized and represent just a sliver of the overall voice AI opportunity. + +On the flip side, High Throughput is about handling many quick, one-off interactions for real-time conversations at scale. Think fast food ordering, booking appointments, or inquiring about the latest deals at a car dealership. These tasks are relevant to just about everyone on the planet, and they require fast, efficient text-to-speech conversion for an AI agent to fulfill them. While voice quality is still important to keep users engaged, quality here is more about the naturalness of the flow of conversation and less about sounding like Morgan Freeman. But the primary focus for most customers in this category is on improving customer outcomes, meaning speed and efficiency are must-haves for ensuring these everyday exchanges are smooth and reliable at high volume. + +"Deepgram showed me less than 200ms latency today. That's the fastest text-to-speech I’ve ever seen. And our customers would be more than satisfied with the conversation quality." + +Jordan Dearsley, Co-founder at Vapi + +Although high production use cases seem to be well-served with UI-centric production tools, high throughput, real-time use cases still mostly rely on APIs provided by the major cloud providers. And our customers have been telling us that they’ve been falling short, with insufficient quality for a good user experience, too much latency to make real-time use cases work, and costs too expensive to operate at scale. + + +More human than human +---------- +With Aura, we’ll give realistic voices to AI agents. Our goal is to craft text-to-speech capabilities that mirror natural human conversations, including timely responses, the incorporation of natural speech fillers like 'um' and 'uh' during contemplation, and the modulation of tone and emotion according to the conversational context. We aim to incorporate laughter and other speech nuances as well. Furthermore, we are dedicated to tailoring these voices to their specific applications, ensuring they remain composed and articulate, particularly in enunciating account numbers and business names with precision. + +"I don’t really consider Azure and the other guys anymore because the voices sound so robotic." +Jordan Dearsley, Co-founder at Vapi + +In blind evaluation trials conducted for benchmarking, early versions of Aura have consistently been rated as sounding more human than prominent alternatives, even outranking human speakers for various audio clips more often than not on average. We were pleasantly surprised by these results (stay tuned for a future post containing comprehensive benchmarks for speed and quality soon!), so much so that we’re accelerating our development timeline and publicly announcing today’s waitlist expansion. + +Here are some sample clips generated by one of the earliest iterations of Aura. The quality and overall performance will continue to improve with additional model training and refinement. We encourage you to give them a listen and note the naturalness of their cadence, rhythm, and tone in the flow of conversation with another human. + + +Our Approach +---------- +For nearly a decade, we’ve worked tirelessly to advance the art of the possible in speech recognition and spoken language understanding. Along the way, we’ve transcribed trillions of spoken words into highly accurate transcriptions. Our model research team has developed novel transformer architectures equipped to deal with the nuances of conversational audio–across different languages, accents, and dialects, while handling disfluencies and the changing rhythms, tones, cadences, and inflections that occur in natural, back-and-forth conversations. + +And all the while, we’ve purposefully built our models under limited constraints to optimize their speed and efficiency. With support for dozens of languages and custom model training, our technical team has trained and deployed thousands of speech AI models (more than anybody else) which we operate and manage for our customers each day using our own computing infrastructure. + +We also have our own in-house data labeling and data ops team with years of experience building bespoke workflows to record, store, and transfer vast amounts of audio in order to label it and continuously grow our bank of high-quality data (millions of hours and counting) used in our model training. + +These combined experiences have made us experts in processing and modeling speech audio, especially in support of streaming use cases with our real-time STT models. Our customers have been asking if we could apply the same approach for TTS, and we can. + +So what can you expect from Aura? Delivering the same market-leading value and performance as Nova-2 does for STT. Aura is built to be the panacea for speed, quality, and efficiency–the fastest of the high-quality options, and the best quality of the fast ones. And that’s really what end users need and what our customers have been asking us to build. + +"Deepgram is a valued partner, providing our customers with high throughput speech-to-text that delivers unrivaled performance without tradeoffs between quality, speed, and cost. We're excited to see Deepgram extend their speech AI platform and bring this approach to the text-to-speech market." - Richard Dumas, VP AI Product Strategy at Five9 + + +What's Next +---------- +As we’ve discussed, scaled voice agents are a high throughput use case, and we believe their success will ultimately depend on a unified approach to audio, one that strikes the right balance between natural voice quality, responsiveness, and cost-efficiency. And with Aura, we’re just getting started. We’re looking forward to continuing to work with customers like Asurion and partners like Five9 across speech-to-text AND text-to-speech as we help them define the future of AI agents, and we invite you to join us on this journey. + +We expect to release generally early next year, but if you’re working on any real-time AI agent use cases, join our waitlist today to jumpstart your development in production as we continue to refine our model and API features with your direct feedback. \ No newline at end of file diff --git a/tests/daily_test/preamble-rest.wav b/tests/daily_test/preamble-rest.wav new file mode 100644 index 00000000..1049d0d2 Binary files /dev/null and b/tests/daily_test/preamble-rest.wav differ diff --git a/tests/daily_test/preamble-websocket.wav b/tests/daily_test/preamble-websocket.wav new file mode 100644 index 00000000..f901de75 Binary files /dev/null and b/tests/daily_test/preamble-websocket.wav differ diff --git a/tests/daily_test/test_daily_listen_rest_file.py b/tests/daily_test/test_daily_listen_rest_file.py new file mode 100644 index 00000000..dc4d0acf --- /dev/null +++ b/tests/daily_test/test_daily_listen_rest_file.py @@ -0,0 +1,115 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import contextlib +import os +import json +import pytest +import hashlib + +from deepgram import DeepgramClient, PrerecordedOptions, FileSource + +from tests.utils import save_metadata_string + +MODEL = "2-general-nova" + +# response constants +FILE1 = "preamble-rest.wav" +FILE1_SMART_FORMAT = "We, the people of the United States, in order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, and secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution for the United States of America." +FILE1_SUMMARIZE1 = "Speaker 1 discusses the goal of establishing a more perfect union, justice, and the common defense for the United States, as part of the Better Union movement. They emphasize the importance of these factors in securing the benefits of liberty for the United States and the world." +FILE1_SUMMARIZE2 = "Speaker 1 discusses the goal of establishing a more perfect union, justice, and the common defense for the United States, as part of the Better Union movement. They emphasize the importance of these goals in securing the blessings of liberty for the United States and the nation." +FILE1_SUMMARIZE3 = "Speaker 1 discusses the importance of protecting the common defense and securing the benefits of liberty to establish...hey also mention the importance of providing for the general welfare and securing the bounty of liberty to themselves." +FILE1_SUMMARIZE4 = "Speaker 1 discusses the importance of establishing a better union, maintaining the common defense, and promoting the general welfare in order to secure the benefits of liberty and establish the United States constitution." +FILE1_SUMMARIZE5 = "Speaker 1 discusses the importance of protecting the common defense and securing the benefits of liberty to establish a United States of America constitution. They also mention the importance of providing for the general welfare and securing the bounty of liberty to ourselves and our posterity." + +# Create a list of tuples to store the key-value pairs +input_output = [ + ( + FILE1, + PrerecordedOptions(model="nova-2", smart_format=True), + {"results.channels.0.alternatives.0.transcript": [FILE1_SMART_FORMAT]}, + ), + ( + FILE1, + PrerecordedOptions(model="nova-2", smart_format=True, summarize="v2"), + { + "results.channels.0.alternatives.0.transcript": [FILE1_SMART_FORMAT], + "results.summary.short": [ + FILE1_SUMMARIZE1, + FILE1_SUMMARIZE2, + FILE1_SUMMARIZE3, + FILE1_SUMMARIZE4, + ], + }, + ), +] + + +@pytest.mark.parametrize("filename, options, expected_output", input_output) +def test_daily_listen_rest_file(filename, options, expected_output): + # Save the options + filenamestr = json.dumps(filename) + input_sha256sum = hashlib.sha256(filenamestr.encode()).hexdigest() + option_sha256sum = hashlib.sha256(options.to_json().encode()).hexdigest() + + unique = f"{option_sha256sum}-{input_sha256sum}" + + # filenames + file_cmd = f"tests/response_data/listen/rest/{unique}.cmd" + file_options = f"tests/response_data/listen/rest/{unique}-options.json" + file_resp = f"tests/response_data/listen/rest/{unique}-response.json" + file_error = f"tests/response_data/listen/rest/{unique}-error.json" + + # clean up + with contextlib.suppress(FileNotFoundError): + os.remove(file_cmd) + with contextlib.suppress(FileNotFoundError): + os.remove(file_options) + with contextlib.suppress(FileNotFoundError): + os.remove(file_resp) + with contextlib.suppress(FileNotFoundError): + os.remove(file_error) + + # Create a Deepgram client + deepgram = DeepgramClient() + + # file buffer + with open(f"tests/daily_test/{filename}", "rb") as file: + buffer_data = file.read() + + payload: FileSource = { + "buffer": buffer_data, + } + + # Send the URL to Deepgram + response = deepgram.listen.rest.v("1").transcribe_file(payload, options) + + # Save all the things + save_metadata_string(file_cmd, filenamestr) + save_metadata_string(file_options, options.to_json()) + save_metadata_string(file_resp, response.to_json()) + + # Check the response + for key, value in response.metadata.model_info.items(): + assert ( + value.name == MODEL + ), f"Test ID: {unique} - Expected: {MODEL}, Actual: {value.name}" + + for key, value in expected_output.items(): + actual = response.eval(key) + expected = value + + try: + assert ( + actual in expected + ), f"Test ID: {unique} - Key: {key}, Expected: {expected}, Actual: {actual}" + finally: + # if asserted + if not (actual in expected): + failure = { + "actual": actual, + "expected": expected, + } + failuresstr = json.dumps(failure) + save_metadata_string(file_error, failuresstr) diff --git a/tests/daily_test/test_daily_listen_rest_url.py b/tests/daily_test/test_daily_listen_rest_url.py new file mode 100644 index 00000000..98c42e42 --- /dev/null +++ b/tests/daily_test/test_daily_listen_rest_url.py @@ -0,0 +1,100 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import contextlib +import os +import json +import pytest +import hashlib + +from deepgram import DeepgramClient, PrerecordedOptions + +from tests.utils import save_metadata_string + +MODEL = "2-general-nova" + +# response constants +URL1 = { + "url": "https://static.deepgram.com/examples/Bueller-Life-moves-pretty-fast.wav" +} +URL1_SMART_FORMAT = "Yep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it." +URL1_SUMMARIZE = "Yep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it." + +# Create a list of tuples to store the key-value pairs +input_output = [ + ( + URL1, + PrerecordedOptions(model="nova-2", smart_format=True), + {"results.channels.0.alternatives.0.transcript": [URL1_SMART_FORMAT]}, + ), + ( + URL1, + PrerecordedOptions(model="nova-2", smart_format=True, summarize="v2"), + { + "results.channels.0.alternatives.0.transcript": [URL1_SMART_FORMAT], + "results.summary.short": [URL1_SUMMARIZE], + }, + ), +] + + +@pytest.mark.parametrize("url, options, expected_output", input_output) +def test_daily_listen_rest_url(url, options, expected_output): + # Save the options + urlstr = json.dumps(url) + input_sha256sum = hashlib.sha256(urlstr.encode()).hexdigest() + option_sha256sum = hashlib.sha256(options.to_json().encode()).hexdigest() + + unique = f"{option_sha256sum}-{input_sha256sum}" + + # filenames + file_cmd = f"tests/response_data/listen/rest/{unique}.cmd" + file_options = f"tests/response_data/listen/rest/{unique}-options.json" + file_resp = f"tests/response_data/listen/rest/{unique}-response.json" + file_error = f"tests/response_data/listen/rest/{unique}-error.json" + + # clean up + with contextlib.suppress(FileNotFoundError): + os.remove(file_cmd) + with contextlib.suppress(FileNotFoundError): + os.remove(file_options) + with contextlib.suppress(FileNotFoundError): + os.remove(file_resp) + with contextlib.suppress(FileNotFoundError): + os.remove(file_error) + + # Create a Deepgram client + deepgram = DeepgramClient() + + # Send the URL to Deepgram + response = deepgram.listen.rest.v("1").transcribe_url(url, options) + + # Save all the things + save_metadata_string(file_cmd, urlstr) + save_metadata_string(file_options, options.to_json()) + save_metadata_string(file_resp, response.to_json()) + + # Check the response + for key, value in response.metadata.model_info.items(): + assert ( + value.name == MODEL + ), f"Test ID: {unique} - Expected: {MODEL}, Actual: {value.name}" + + for key, value in expected_output.items(): + actual = response.eval(key) + expected = value + + try: + assert ( + actual in expected + ), f"Test ID: {unique} - Key: {key}, Expected: {expected}, Actual: {actual}" + finally: + # if asserted + if not (actual in expected): + failure = { + "actual": actual, + "expected": expected, + } + failuresstr = json.dumps(failure) + save_metadata_string(file_error, failuresstr) diff --git a/tests/daily_test/test_daily_listen_websocket.py b/tests/daily_test/test_daily_listen_websocket.py new file mode 100644 index 00000000..7a1fc5a3 --- /dev/null +++ b/tests/daily_test/test_daily_listen_websocket.py @@ -0,0 +1,179 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import contextlib +import os +import json +import pytest +import hashlib +import time +import soundfile as sf + +from deepgram import ( + DeepgramClient, + DeepgramClientOptions, + LiveOptions, + LiveTranscriptionEvents, +) + +from tests.utils import save_metadata_string + +MODEL = "2-general-nova" + +# response constants +FILE1 = "testing-websocket.wav" +FILE2 = "preamble-websocket.wav" +FILE1_SMART_FORMAT = "Testing. 123. Testing. 123." +FILE2_SMART_FORMAT1 = "We, the people of the United States, in order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, and secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution for the United States of America." +FILE2_SMART_FORMAT2 = "We, the people of the United States, order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution. For the United States of America." +FILE2_SMART_FORMAT3 = "We, the people of the United States, order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution for the United States of America." + +# Create a list of tuples to store the key-value pairs +input_output = [ + ( + FILE1, + LiveOptions( + language="en-US", + smart_format=True, + encoding="mulaw", + channels=1, + sample_rate=8000, + punctuate=True, + ), + {"output": [FILE1_SMART_FORMAT]}, + ), + ( + FILE2, + LiveOptions( + language="en-US", + smart_format=True, + encoding="mulaw", + channels=1, + sample_rate=8000, + punctuate=True, + ), + {"output": [FILE2_SMART_FORMAT1, FILE2_SMART_FORMAT2, FILE2_SMART_FORMAT3]}, + ), +] + +response = "" +raw_json = "" + + +@pytest.mark.parametrize("filename, options, expected_output", input_output) +def test_daily_listen_websocket(filename, options, expected_output): + global response, raw_json + response = "" + raw_json = "" + + # Save the options + filenamestr = json.dumps(filename) + input_sha256sum = hashlib.sha256(filenamestr.encode()).hexdigest() + option_sha256sum = hashlib.sha256(options.to_json().encode()).hexdigest() + + unique = f"{option_sha256sum}-{input_sha256sum}" + + # filenames + file_cmd = f"tests/response_data/listen/websocket/{unique}.cmd" + file_options = f"tests/response_data/listen/websocket/{unique}-options.json" + file_resp = f"tests/response_data/listen/websocket/{unique}-response.json" + file_error = f"tests/response_data/listen/websocket/{unique}-error.json" + + # clean up + with contextlib.suppress(FileNotFoundError): + os.remove(file_cmd) + with contextlib.suppress(FileNotFoundError): + os.remove(file_options) + with contextlib.suppress(FileNotFoundError): + os.remove(file_resp) + with contextlib.suppress(FileNotFoundError): + os.remove(file_error) + + # Create a Deepgram client + config = DeepgramClientOptions(options={"keepalive": "true"}) + deepgram: DeepgramClient = DeepgramClient("", config) + + # Send the URL to Deepgram + dg_connection = deepgram.listen.websocket.v("1") + + def on_message(self, result, **kwargs): + global response, raw_json + sentence = result.channel.alternatives[0].transcript + if len(sentence) == 0: + return + if result.is_final: + raw_json = result.to_json() # TODO: need to handle multiple results + if len(response) > 0: + response = response + " " + response = response + sentence + + dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) + + # connect + assert dg_connection.start(options) == True + time.sleep(0.5) + + # Read the mu-law encoded WAV file using soundfile + data, samplerate = sf.read( + f"tests/daily_test/{filename}", + dtype="int16", + channels=1, + format="RAW", + subtype="PCM_16", + samplerate=8000, + ) + + # Stream the audio frames in chunks + chunk_size = 4096 # Adjust as necessary + for i in range(0, len(data), chunk_size): + chunk = data[i : i + chunk_size].tobytes() + dg_connection.send(chunk) + time.sleep(0.25) + + # each iteration is 0.5 seconds * 20 iterations = 10 second timeout + timeout = 0 + exit = False + while dg_connection.is_connected() and timeout < 20 and not exit: + for key, value in expected_output.items(): + if response in value: + exit = True + break + timeout = timeout + 1 + time.sleep(0.5) + + # close + dg_connection.finish() + time.sleep(0.25) + + # Check the response + if response == "": + assert response != "", f"Test ID: {unique} - No response received" + elif response == "" and timeout > 20: + assert ( + timeout < 20 + ), f"Test ID: {unique} - Timed out OR the value is not in the expected_output" + + # Save all the things + save_metadata_string(file_cmd, filenamestr) + save_metadata_string(file_options, options.to_json()) + save_metadata_string(file_resp, raw_json) + + # Check the response + for key, value in expected_output.items(): + actual = response + expected = value + + try: + assert ( + actual in expected + ), f"Test ID: {unique} - Expected: {expected}, Actual: {actual}" + finally: + # if asserted + if not (actual in expected): + failure = { + "actual": actual, + "expected": expected, + } + failuresstr = json.dumps(failure) + save_metadata_string(file_error, failuresstr) diff --git a/tests/daily_test/test_daily_read_rest_file.py b/tests/daily_test/test_daily_read_rest_file.py new file mode 100644 index 00000000..0ba2a645 --- /dev/null +++ b/tests/daily_test/test_daily_read_rest_file.py @@ -0,0 +1,104 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import contextlib +import os +import json +import pytest +import hashlib + +from deepgram import DeepgramClient, AnalyzeOptions, FileSource + +from tests.utils import save_metadata_string + +# response constants +FILE1 = "conversation.txt" +FILE1_SUMMARIZE1 = "The potential for voice-based interfaces in conversational AI applications is discussed, with a focus on voice-premises and wearable devices. The success of voice-first experiences and tools, including DeepgramQuad, is highlighted, along with the potential for high-throughput and fast text-to-speech conversion for AI agents. The speakers emphasize the benefits of voice quality, including natural speech flow, and the importance of tailoring voice to specific applications. They also mention their involvement in machine learning and their plans to expand their waitlist for a speech-to-text model. They expect to release generally early next year, but if working on any real-time AI agent use cases, they can join their waitlist to jumpstart their development in production." +FILE1_SUMMARIZE2 = "The speakers discuss the potential for voice-based interfaces to unlock conversational AI applications, including high-performance voice-to-text (TTS) and voice-to-text (TTS) API and DeepgramQuad. They also mention their involvement in machine learning and their plans to expand their waitlist for a speech-to-text model. They expect to release generally early next year, but if working on any real-time AI agent use cases, they can join their waitlist to jumpstart their development in production. They also discuss the benefits of voice quality, including the ability to swap between voices, the naturalness of the flow of conversations, and the importance of tailoring voice to specific applications. They plan to expand their waitlist for a speech-to-text model and work on real-time AI agent use cases." +FILE1_SUMMARIZE3 = "The potential for voice-based interfaces in conversational AI applications is discussed, with a focus on voice-premised and wearable devices. The speakers emphasize the benefits of voice quality, including natural speech flow, and the potential for AI agents to be more human than humans in speech recognition. They also mention their involvement in machine learning and plans to expand their waitlist for a speech-to-text model. They expect to release generally early next year, but if working on any real-time AI agent use cases, they can join their waitlist to jumpstart their development in production." +FILE1_SUMMARIZE4 = "The speakers discuss the potential for voice-based interfaces to unlock conversational AI applications, including the use of natural voices and audio to create voice-like experiences. They also emphasize the benefits of voice quality, including the ability to swap between voices and the natural flow of conversations. The potential for AI agents to be more human than humans in speech recognition, including the use of natural voices and audio to create voice-like experiences. They plan to expand their waitlist for a speech-to-text model and work on real-time AI agent use cases, with a general release expected early next year. They also mention their involvement in machine learning and their plans to expand their waitlist for a speech-to-text model." +FILE1_SUMMARIZE5 = "The potential for voice-based interfaces in conversational AI applications is discussed, with a focus on voice-premises and wearable devices. The success of voice-first experiences and tools, including DeepgramQuad, is highlighted, along with the potential for high-throughput and fast text-to-speech conversion for AI agents. The speakers emphasize the benefits of voice quality, including natural speech flow, and the importance of tailoring voice to specific applications. They expect to release soon and invite customers to join their waitlist for more information on their plans." +FILE1_SUMMARIZE6 = "The potential for voice-based interfaces in conversational AI applications is discussed, with a focus on voice-premises and wearable devices. The success of voice-first experiences and tools, including DeepgramQuad, is highlighted, along with the potential for high-throughput and fast text-to-speech conversion for AI agents. The speakers emphasize the benefits of voice quality, including natural speech flow, and the potential for AI agents to be more human than humans in the future. The company is excited about the potential for AI agents to be more human than humans in the future." + +# Create a list of tuples to store the key-value pairs +input_output = [ + ( + FILE1, + AnalyzeOptions(language="en", summarize=True), + { + "results.summary.text": [ + FILE1_SUMMARIZE1, + FILE1_SUMMARIZE2, + FILE1_SUMMARIZE3, + FILE1_SUMMARIZE4, + FILE1_SUMMARIZE5, + FILE1_SUMMARIZE6, + ] + }, + ), +] + + +@pytest.mark.parametrize("filename, options, expected_output", input_output) +def test_daily_analyze_rest_file(filename, options, expected_output): + # Save the options + filenamestr = json.dumps(filename) + input_sha256sum = hashlib.sha256(filenamestr.encode()).hexdigest() + option_sha256sum = hashlib.sha256(options.to_json().encode()).hexdigest() + + unique = f"{option_sha256sum}-{input_sha256sum}" + + # filenames + file_cmd = f"tests/response_data/read/rest/{unique}.cmd" + file_options = f"tests/response_data/read/rest/{unique}-options.json" + file_resp = f"tests/response_data/read/rest/{unique}-response.json" + file_error = f"tests/response_data/read/rest/{unique}-error.json" + + # clean up + with contextlib.suppress(FileNotFoundError): + os.remove(file_cmd) + with contextlib.suppress(FileNotFoundError): + os.remove(file_options) + with contextlib.suppress(FileNotFoundError): + os.remove(file_resp) + with contextlib.suppress(FileNotFoundError): + os.remove(file_error) + + # Create a Deepgram client + deepgram = DeepgramClient() + + # file buffer + with open(f"tests/daily_test/{filename}", "rb") as file: + buffer_data = file.read() + + payload: FileSource = { + "buffer": buffer_data, + } + + # Send the URL to Deepgram + response = deepgram.read.analyze.v("1").analyze_text(payload, options) + + # Save all the things + save_metadata_string(file_cmd, filenamestr) + save_metadata_string(file_options, options.to_json()) + save_metadata_string(file_resp, response.to_json()) + + # Check the response + for key, value in expected_output.items(): + actual = response.eval(key) + expected = value + + try: + assert ( + actual in expected + ), f"Test ID: {unique} - Key: {key}, Expected: {expected}, Actual: {actual}" + finally: + # if asserted + if not (actual in expected): + failure = { + "actual": actual, + "expected": expected, + } + failuresstr = json.dumps(failure) + save_metadata_string(file_error, failuresstr) diff --git a/tests/daily_test/test_daily_speak_rest.py b/tests/daily_test/test_daily_speak_rest.py new file mode 100644 index 00000000..11e56b1d --- /dev/null +++ b/tests/daily_test/test_daily_speak_rest.py @@ -0,0 +1,113 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import contextlib +import os +import json +import pytest +import hashlib + +from deepgram import DeepgramClient, SpeakOptions, PrerecordedOptions, FileSource + +from tests.utils import save_metadata_string + +TTS_MODEL = "aura-asteria-en" +STT_MODEL = "2-general-nova" + +# response constants +TEXT1 = "Hello, world." + +# Create a list of tuples to store the key-value pairs +input_output = [ + ( + TEXT1, + SpeakOptions(model=TTS_MODEL, encoding="linear16", sample_rate=24000), + PrerecordedOptions(model="nova-2", smart_format=True), + {"results.channels.0.alternatives.0.transcript": [TEXT1]}, + ), +] + + +@pytest.mark.parametrize( + "text, tts_options, stt_options, expected_output", input_output +) +def test_daily_speak_rest(text, tts_options, stt_options, expected_output): + # Save the options + input_sha256sum = hashlib.sha256(text.encode()).hexdigest() + option_sha256sum = hashlib.sha256(tts_options.to_json().encode()).hexdigest() + + unique = f"{option_sha256sum}-{input_sha256sum}" + + # filenames + audio_file = f"tests/response_data/speak/rest/{unique}.wav" + file_cmd = f"tests/response_data/speak/rest/{unique}.cmd" + file_options = f"tests/response_data/speak/rest/{unique}-options.json" + file_resp = f"tests/response_data/speak/rest/{unique}-response.json" + file_error = f"tests/response_data/speak/rest/{unique}-error.json" + + # clean up + with contextlib.suppress(FileNotFoundError): + os.remove(audio_file) + with contextlib.suppress(FileNotFoundError): + os.remove(file_cmd) + with contextlib.suppress(FileNotFoundError): + os.remove(file_options) + with contextlib.suppress(FileNotFoundError): + os.remove(file_resp) + with contextlib.suppress(FileNotFoundError): + os.remove(file_error) + + # Create a Deepgram client + deepgram = DeepgramClient() + + # input text + input_text = {"text": text} + + # Send the URL to Deepgram + response = deepgram.speak.rest.v("1").stream_memory(input_text, tts_options) + + # Save all the things + save_metadata_string(file_cmd, text) + save_metadata_string(file_options, tts_options.to_json()) + save_metadata_string(file_resp, response.to_json()) + + with open(audio_file, "wb+") as file: + file.write(response.stream_memory.getbuffer()) + file.flush() + + # Check the response + # file buffer + with open(audio_file, "rb") as file: + buffer_data = file.read() + + payload: FileSource = { + "buffer": buffer_data, + } + + # Send the URL to Deepgram + response = deepgram.listen.rest.v("1").transcribe_file(payload, stt_options) + + # Check the response + for key, value in response.metadata.model_info.items(): + assert ( + value.name == STT_MODEL + ), f"Test ID: {unique} - Expected: {STT_MODEL}, Actual: {value.name}" + + for key, value in expected_output.items(): + actual = response.eval(key) + expected = value + + try: + assert ( + actual in expected + ), f"Test ID: {unique} - Key: {key}, Expected: {expected}, Actual: {actual}" + finally: + # if asserted + if not (actual in expected): + failure = { + "actual": actual, + "expected": expected, + } + failuresstr = json.dumps(failure) + save_metadata_string(file_error, failuresstr) diff --git a/tests/daily_test/testing-websocket.wav b/tests/daily_test/testing-websocket.wav new file mode 100644 index 00000000..c0ebac88 Binary files /dev/null and b/tests/daily_test/testing-websocket.wav differ diff --git a/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-options.json b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-options.json new file mode 100644 index 00000000..66dd9bf2 --- /dev/null +++ b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-options.json @@ -0,0 +1 @@ +{"model": "nova-2", "smart_format": true} \ No newline at end of file diff --git a/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-response.json b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-response.json new file mode 100644 index 00000000..03dda577 --- /dev/null +++ b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-response.json @@ -0,0 +1 @@ +{"metadata": {"transaction_key": "deprecated", "request_id": "fe83b0a5-8aaf-462b-b441-394ac06842ec", "sha256": "5324da68ede209a16ac69a38e8cd29cee4d754434a041166cda3a1f5e0b24566", "created": "2024-07-30T13:50:15.039Z", "duration": 17.566313, "channels": 1, "models": ["30089e05-99d1-4376-b32e-c263170674af"], "model_info": {"30089e05-99d1-4376-b32e-c263170674af": {"name": "2-general-nova", "version": "2024-01-09.29447", "arch": "nova-2"}}}, "results": {"channels": [{"alternatives": [{"transcript": "Yep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.", "confidence": 0.9982221, "words": [{"word": "yep", "start": 5.52, "end": 6.02, "confidence": 0.9984743, "punctuated_word": "Yep."}, {"word": "i", "start": 7.095, "end": 7.2549996, "confidence": 0.83988285, "punctuated_word": "I"}, {"word": "said", "start": 7.2549996, "end": 7.415, "confidence": 0.93736196, "punctuated_word": "said"}, {"word": "it", "start": 7.415, "end": 7.495, "confidence": 0.99844295, "punctuated_word": "it"}, {"word": "before", "start": 7.495, "end": 7.975, "confidence": 0.9997708, "punctuated_word": "before"}, {"word": "and", "start": 7.975, "end": 8.135, "confidence": 0.5562555, "punctuated_word": "and"}, {"word": "i'll", "start": 8.135, "end": 8.295, "confidence": 0.9982221, "punctuated_word": "I'll"}, {"word": "say", "start": 8.295, "end": 8.455, "confidence": 0.99867755, "punctuated_word": "say"}, {"word": "it", "start": 8.455, "end": 8.615, "confidence": 0.99852693, "punctuated_word": "it"}, {"word": "again", "start": 8.615, "end": 9.115, "confidence": 0.8483447, "punctuated_word": "again."}, {"word": "life", "start": 9.975, "end": 10.295, "confidence": 0.995748, "punctuated_word": "Life"}, {"word": "moves", "start": 10.295, "end": 10.695, "confidence": 0.9985415, "punctuated_word": "moves"}, {"word": "pretty", "start": 10.695, "end": 11.014999, "confidence": 0.99934846, "punctuated_word": "pretty"}, {"word": "fast", "start": 11.014999, "end": 11.514999, "confidence": 0.999277, "punctuated_word": "fast."}, {"word": "you", "start": 11.975, "end": 12.215, "confidence": 0.9480036, "punctuated_word": "You"}, {"word": "don't", "start": 12.215, "end": 12.455, "confidence": 0.99980104, "punctuated_word": "don't"}, {"word": "stop", "start": 12.455, "end": 12.695, "confidence": 0.99982065, "punctuated_word": "stop"}, {"word": "and", "start": 12.695, "end": 12.855, "confidence": 0.9984894, "punctuated_word": "and"}, {"word": "look", "start": 12.855, "end": 13.094999, "confidence": 0.9997218, "punctuated_word": "look"}, {"word": "around", "start": 13.094999, "end": 13.334999, "confidence": 0.9994823, "punctuated_word": "around"}, {"word": "once", "start": 13.334999, "end": 13.575, "confidence": 0.9980413, "punctuated_word": "once"}, {"word": "in", "start": 13.575, "end": 13.735, "confidence": 0.9971276, "punctuated_word": "in"}, {"word": "a", "start": 13.735, "end": 13.815, "confidence": 0.9545489, "punctuated_word": "a"}, {"word": "while", "start": 13.815, "end": 14.315, "confidence": 0.9719232, "punctuated_word": "while,"}, {"word": "you", "start": 14.561313, "end": 14.7213125, "confidence": 0.98991394, "punctuated_word": "you"}, {"word": "could", "start": 14.7213125, "end": 14.961312, "confidence": 0.99663407, "punctuated_word": "could"}, {"word": "miss", "start": 14.961312, "end": 15.461312, "confidence": 0.9973659, "punctuated_word": "miss"}, {"word": "it", "start": 17.281313, "end": 17.566313, "confidence": 0.98995495, "punctuated_word": "it."}], "paragraphs": {"transcript": "\nYep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.", "paragraphs": [{"sentences": [{"text": "Yep.", "start": 5.52, "end": 6.02}, {"text": "I said it before and I'll say it again.", "start": 7.095, "end": 9.115}, {"text": "Life moves pretty fast.", "start": 9.975, "end": 11.514999}, {"text": "You don't stop and look around once in a while, you could miss it.", "start": 11.975, "end": 17.566313}], "start": 5.52, "end": 17.566313, "num_words": 28}]}}]}]}} \ No newline at end of file diff --git a/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a.cmd b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a.cmd new file mode 100644 index 00000000..fcf4600d --- /dev/null +++ b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a.cmd @@ -0,0 +1 @@ +{"url": "https://static.deepgram.com/examples/Bueller-Life-moves-pretty-fast.wav"} \ No newline at end of file diff --git a/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-options.json b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-options.json new file mode 100644 index 00000000..66dd9bf2 --- /dev/null +++ b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-options.json @@ -0,0 +1 @@ +{"model": "nova-2", "smart_format": true} \ No newline at end of file diff --git a/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-response.json b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-response.json new file mode 100644 index 00000000..296f4d0a --- /dev/null +++ b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-response.json @@ -0,0 +1 @@ +{"metadata": {"transaction_key": "deprecated", "request_id": "b25d6afb-5531-4c39-98e6-d2cd72a52399", "sha256": "95dc40091b6a8456a1554ddfc4f163768217afd66bee70a10c74bb52805cd0d9", "created": "2024-07-30T13:50:11.992Z", "duration": 19.097937, "channels": 1, "models": ["30089e05-99d1-4376-b32e-c263170674af"], "model_info": {"30089e05-99d1-4376-b32e-c263170674af": {"name": "2-general-nova", "version": "2024-01-09.29447", "arch": "nova-2"}}}, "results": {"channels": [{"alternatives": [{"transcript": "We, the people of the United States, in order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, and secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution for the United States of America.", "confidence": 0.99925345, "words": [{"word": "we", "start": 0.32, "end": 0.64, "confidence": 0.95071673, "punctuated_word": "We,"}, {"word": "the", "start": 0.64, "end": 0.88, "confidence": 0.9970252, "punctuated_word": "the"}, {"word": "people", "start": 0.88, "end": 1.1999999, "confidence": 0.97336787, "punctuated_word": "people"}, {"word": "of", "start": 1.1999999, "end": 1.4399999, "confidence": 0.9990214, "punctuated_word": "of"}, {"word": "the", "start": 1.4399999, "end": 1.52, "confidence": 0.9991234, "punctuated_word": "the"}, {"word": "united", "start": 1.52, "end": 1.92, "confidence": 0.9996474, "punctuated_word": "United"}, {"word": "states", "start": 1.92, "end": 2.3999999, "confidence": 0.9916526, "punctuated_word": "States,"}, {"word": "in", "start": 2.3999999, "end": 2.56, "confidence": 0.9996748, "punctuated_word": "in"}, {"word": "order", "start": 2.56, "end": 2.8799999, "confidence": 0.99996233, "punctuated_word": "order"}, {"word": "to", "start": 2.8799999, "end": 3.04, "confidence": 0.99966836, "punctuated_word": "to"}, {"word": "form", "start": 3.04, "end": 3.28, "confidence": 0.99957234, "punctuated_word": "form"}, {"word": "a", "start": 3.28, "end": 3.36, "confidence": 0.99972504, "punctuated_word": "a"}, {"word": "more", "start": 3.36, "end": 3.6, "confidence": 0.9999572, "punctuated_word": "more"}, {"word": "perfect", "start": 3.6, "end": 3.9199998, "confidence": 0.99989796, "punctuated_word": "perfect"}, {"word": "union", "start": 3.9199998, "end": 4.42, "confidence": 0.9926959, "punctuated_word": "union,"}, {"word": "establish", "start": 4.72, "end": 5.22, "confidence": 0.9708791, "punctuated_word": "establish"}, {"word": "justice", "start": 5.2799997, "end": 5.7799997, "confidence": 0.9977596, "punctuated_word": "justice,"}, {"word": "ensure", "start": 6.0, "end": 6.3999996, "confidence": 0.92022455, "punctuated_word": "ensure"}, {"word": "domestic", "start": 6.3999996, "end": 6.8799996, "confidence": 0.9863414, "punctuated_word": "domestic"}, {"word": "tranquility", "start": 6.8799996, "end": 7.3799996, "confidence": 0.9984266, "punctuated_word": "tranquility,"}, {"word": "provide", "start": 7.9199996, "end": 8.24, "confidence": 0.9998066, "punctuated_word": "provide"}, {"word": "for", "start": 8.24, "end": 8.48, "confidence": 0.99993324, "punctuated_word": "for"}, {"word": "the", "start": 8.48, "end": 8.559999, "confidence": 0.9998996, "punctuated_word": "the"}, {"word": "common", "start": 8.559999, "end": 8.88, "confidence": 0.99925345, "punctuated_word": "common"}, {"word": "defense", "start": 8.88, "end": 9.355, "confidence": 0.99885684, "punctuated_word": "defense,"}, {"word": "promote", "start": 9.594999, "end": 9.915, "confidence": 0.9908229, "punctuated_word": "promote"}, {"word": "the", "start": 9.915, "end": 10.075, "confidence": 0.9994222, "punctuated_word": "the"}, {"word": "general", "start": 10.075, "end": 10.554999, "confidence": 0.99770135, "punctuated_word": "general"}, {"word": "welfare", "start": 10.554999, "end": 10.955, "confidence": 0.9617263, "punctuated_word": "welfare,"}, {"word": "and", "start": 10.955, "end": 11.195, "confidence": 0.99983335, "punctuated_word": "and"}, {"word": "secure", "start": 11.195, "end": 11.514999, "confidence": 0.99982953, "punctuated_word": "secure"}, {"word": "the", "start": 11.514999, "end": 11.674999, "confidence": 0.9998596, "punctuated_word": "the"}, {"word": "blessings", "start": 11.674999, "end": 11.994999, "confidence": 0.9988814, "punctuated_word": "blessings"}, {"word": "of", "start": 11.994999, "end": 12.235, "confidence": 0.99994814, "punctuated_word": "of"}, {"word": "liberty", "start": 12.235, "end": 12.714999, "confidence": 0.9485822, "punctuated_word": "liberty"}, {"word": "to", "start": 12.714999, "end": 12.875, "confidence": 0.9982722, "punctuated_word": "to"}, {"word": "ourselves", "start": 12.875, "end": 13.355, "confidence": 0.9997156, "punctuated_word": "ourselves"}, {"word": "and", "start": 13.355, "end": 13.514999, "confidence": 0.87418187, "punctuated_word": "and"}, {"word": "our", "start": 13.514999, "end": 13.674999, "confidence": 0.99951303, "punctuated_word": "our"}, {"word": "posterity", "start": 13.674999, "end": 14.174999, "confidence": 0.85497844, "punctuated_word": "posterity"}, {"word": "to", "start": 14.554999, "end": 14.795, "confidence": 0.60699797, "punctuated_word": "to"}, {"word": "ordain", "start": 14.795, "end": 15.195, "confidence": 0.9992792, "punctuated_word": "ordain"}, {"word": "and", "start": 15.195, "end": 15.434999, "confidence": 0.9992617, "punctuated_word": "and"}, {"word": "establish", "start": 15.434999, "end": 15.934999, "confidence": 0.99766684, "punctuated_word": "establish"}, {"word": "this", "start": 15.994999, "end": 16.234999, "confidence": 0.9996753, "punctuated_word": "this"}, {"word": "constitution", "start": 16.234999, "end": 16.734999, "confidence": 0.93753284, "punctuated_word": "constitution"}, {"word": "for", "start": 16.875, "end": 17.115, "confidence": 0.9990471, "punctuated_word": "for"}, {"word": "the", "start": 17.115, "end": 17.275, "confidence": 0.9999032, "punctuated_word": "the"}, {"word": "united", "start": 17.275, "end": 17.595, "confidence": 0.9995665, "punctuated_word": "United"}, {"word": "states", "start": 17.595, "end": 17.914999, "confidence": 0.99979764, "punctuated_word": "States"}, {"word": "of", "start": 17.914999, "end": 18.075, "confidence": 0.99959284, "punctuated_word": "of"}, {"word": "america", "start": 18.075, "end": 18.575, "confidence": 0.9946651, "punctuated_word": "America."}], "paragraphs": {"transcript": "\nWe, the people of the United States, in order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, and secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution for the United States of America.", "paragraphs": [{"sentences": [{"text": "We, the people of the United States, in order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, and secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution for the United States of America.", "start": 0.32, "end": 18.575}], "start": 0.32, "end": 18.575, "num_words": 52}]}}]}]}} \ No newline at end of file diff --git a/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76.cmd b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76.cmd new file mode 100644 index 00000000..ce49b2cd --- /dev/null +++ b/tests/response_data/listen/rest/b00dc103a62ea2ccfc752ec0f646c7528ef5e729a9d7481d2a944253a9128ce2-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76.cmd @@ -0,0 +1 @@ +"preamble-rest.wav" \ No newline at end of file diff --git a/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-options.json b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-options.json new file mode 100644 index 00000000..1921e812 --- /dev/null +++ b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-options.json @@ -0,0 +1 @@ +{"model": "nova-2", "smart_format": true, "summarize": "v2"} \ No newline at end of file diff --git a/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-response.json b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-response.json new file mode 100644 index 00000000..3e079329 --- /dev/null +++ b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a-response.json @@ -0,0 +1 @@ +{"metadata": {"transaction_key": "deprecated", "request_id": "995ae57f-de13-4fb4-b19f-e9446c850525", "sha256": "5324da68ede209a16ac69a38e8cd29cee4d754434a041166cda3a1f5e0b24566", "created": "2024-07-30T13:50:15.807Z", "duration": 17.566313, "channels": 1, "models": ["30089e05-99d1-4376-b32e-c263170674af"], "model_info": {"30089e05-99d1-4376-b32e-c263170674af": {"name": "2-general-nova", "version": "2024-01-09.29447", "arch": "nova-2"}}, "summary_info": {"input_tokens": 0, "output_tokens": 0, "model_uuid": "67875a7f-c9c4-48a0-aa55-5bdb8a91c34a"}}, "results": {"channels": [{"alternatives": [{"transcript": "Yep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.", "confidence": 0.9982151, "words": [{"word": "yep", "start": 5.52, "end": 6.02, "confidence": 0.99848306, "punctuated_word": "Yep."}, {"word": "i", "start": 7.095, "end": 7.2549996, "confidence": 0.84291065, "punctuated_word": "I"}, {"word": "said", "start": 7.2549996, "end": 7.415, "confidence": 0.93813825, "punctuated_word": "said"}, {"word": "it", "start": 7.415, "end": 7.495, "confidence": 0.9984339, "punctuated_word": "it"}, {"word": "before", "start": 7.495, "end": 7.975, "confidence": 0.9997731, "punctuated_word": "before"}, {"word": "and", "start": 7.975, "end": 8.135, "confidence": 0.5572641, "punctuated_word": "and"}, {"word": "i'll", "start": 8.135, "end": 8.295, "confidence": 0.9982151, "punctuated_word": "I'll"}, {"word": "say", "start": 8.295, "end": 8.455, "confidence": 0.9986897, "punctuated_word": "say"}, {"word": "it", "start": 8.455, "end": 8.615, "confidence": 0.9985261, "punctuated_word": "it"}, {"word": "again", "start": 8.615, "end": 9.115, "confidence": 0.8480083, "punctuated_word": "again."}, {"word": "life", "start": 9.975, "end": 10.295, "confidence": 0.99577385, "punctuated_word": "Life"}, {"word": "moves", "start": 10.295, "end": 10.695, "confidence": 0.99855036, "punctuated_word": "moves"}, {"word": "pretty", "start": 10.695, "end": 11.014999, "confidence": 0.99935323, "punctuated_word": "pretty"}, {"word": "fast", "start": 11.014999, "end": 11.514999, "confidence": 0.9992754, "punctuated_word": "fast."}, {"word": "you", "start": 11.975, "end": 12.215, "confidence": 0.94851214, "punctuated_word": "You"}, {"word": "don't", "start": 12.215, "end": 12.455, "confidence": 0.99980193, "punctuated_word": "don't"}, {"word": "stop", "start": 12.455, "end": 12.695, "confidence": 0.9998211, "punctuated_word": "stop"}, {"word": "and", "start": 12.695, "end": 12.855, "confidence": 0.99849033, "punctuated_word": "and"}, {"word": "look", "start": 12.855, "end": 13.094999, "confidence": 0.9997216, "punctuated_word": "look"}, {"word": "around", "start": 13.094999, "end": 13.334999, "confidence": 0.999483, "punctuated_word": "around"}, {"word": "once", "start": 13.334999, "end": 13.575, "confidence": 0.9980342, "punctuated_word": "once"}, {"word": "in", "start": 13.575, "end": 13.735, "confidence": 0.9971301, "punctuated_word": "in"}, {"word": "a", "start": 13.735, "end": 13.815, "confidence": 0.9540763, "punctuated_word": "a"}, {"word": "while", "start": 13.815, "end": 14.315, "confidence": 0.9714056, "punctuated_word": "while,"}, {"word": "you", "start": 14.561313, "end": 14.7213125, "confidence": 0.9899132, "punctuated_word": "you"}, {"word": "could", "start": 14.7213125, "end": 14.961312, "confidence": 0.99663407, "punctuated_word": "could"}, {"word": "miss", "start": 14.961312, "end": 15.461312, "confidence": 0.99736553, "punctuated_word": "miss"}, {"word": "it", "start": 17.281313, "end": 17.566313, "confidence": 0.98995394, "punctuated_word": "it."}], "paragraphs": {"transcript": "\nYep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it.", "paragraphs": [{"sentences": [{"text": "Yep.", "start": 5.52, "end": 6.02}, {"text": "I said it before and I'll say it again.", "start": 7.095, "end": 9.115}, {"text": "Life moves pretty fast.", "start": 9.975, "end": 11.514999}, {"text": "You don't stop and look around once in a while, you could miss it.", "start": 11.975, "end": 17.566313}], "start": 5.52, "end": 17.566313, "num_words": 28}]}}]}], "summary": {"result": "success", "short": "Yep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it."}}} \ No newline at end of file diff --git a/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a.cmd b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a.cmd new file mode 100644 index 00000000..fcf4600d --- /dev/null +++ b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-29e7c8100617f70da4ae9da1921cb5071a01219f4780ca70930b0a370ed2163a.cmd @@ -0,0 +1 @@ +{"url": "https://static.deepgram.com/examples/Bueller-Life-moves-pretty-fast.wav"} \ No newline at end of file diff --git a/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-options.json b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-options.json new file mode 100644 index 00000000..1921e812 --- /dev/null +++ b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-options.json @@ -0,0 +1 @@ +{"model": "nova-2", "smart_format": true, "summarize": "v2"} \ No newline at end of file diff --git a/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-response.json b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-response.json new file mode 100644 index 00000000..d70ae348 --- /dev/null +++ b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76-response.json @@ -0,0 +1 @@ +{"metadata": {"transaction_key": "deprecated", "request_id": "d54e1e21-73b7-430c-bf67-0581a070b316", "sha256": "95dc40091b6a8456a1554ddfc4f163768217afd66bee70a10c74bb52805cd0d9", "created": "2024-07-30T13:50:12.829Z", "duration": 19.097937, "channels": 1, "models": ["30089e05-99d1-4376-b32e-c263170674af"], "model_info": {"30089e05-99d1-4376-b32e-c263170674af": {"name": "2-general-nova", "version": "2024-01-09.29447", "arch": "nova-2"}}, "summary_info": {"input_tokens": 63, "output_tokens": 53, "model_uuid": "67875a7f-c9c4-48a0-aa55-5bdb8a91c34a"}}, "results": {"channels": [{"alternatives": [{"transcript": "We, the people of the United States, in order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, and secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution for the United States of America.", "confidence": 0.9992618, "words": [{"word": "we", "start": 0.32, "end": 0.64, "confidence": 0.95111674, "punctuated_word": "We,"}, {"word": "the", "start": 0.64, "end": 0.88, "confidence": 0.99713194, "punctuated_word": "the"}, {"word": "people", "start": 0.88, "end": 1.1999999, "confidence": 0.9731963, "punctuated_word": "people"}, {"word": "of", "start": 1.1999999, "end": 1.4399999, "confidence": 0.99905545, "punctuated_word": "of"}, {"word": "the", "start": 1.4399999, "end": 1.52, "confidence": 0.9991559, "punctuated_word": "the"}, {"word": "united", "start": 1.52, "end": 1.92, "confidence": 0.9996617, "punctuated_word": "United"}, {"word": "states", "start": 1.92, "end": 2.3999999, "confidence": 0.992371, "punctuated_word": "States,"}, {"word": "in", "start": 2.3999999, "end": 2.56, "confidence": 0.99968016, "punctuated_word": "in"}, {"word": "order", "start": 2.56, "end": 2.8799999, "confidence": 0.99996495, "punctuated_word": "order"}, {"word": "to", "start": 2.8799999, "end": 3.04, "confidence": 0.999676, "punctuated_word": "to"}, {"word": "form", "start": 3.04, "end": 3.28, "confidence": 0.999577, "punctuated_word": "form"}, {"word": "a", "start": 3.28, "end": 3.36, "confidence": 0.99972373, "punctuated_word": "a"}, {"word": "more", "start": 3.36, "end": 3.6, "confidence": 0.9999578, "punctuated_word": "more"}, {"word": "perfect", "start": 3.6, "end": 3.9199998, "confidence": 0.9999012, "punctuated_word": "perfect"}, {"word": "union", "start": 3.9199998, "end": 4.42, "confidence": 0.9927752, "punctuated_word": "union,"}, {"word": "establish", "start": 4.72, "end": 5.22, "confidence": 0.9711629, "punctuated_word": "establish"}, {"word": "justice", "start": 5.2799997, "end": 5.7799997, "confidence": 0.9978107, "punctuated_word": "justice,"}, {"word": "ensure", "start": 6.0, "end": 6.3999996, "confidence": 0.92067, "punctuated_word": "ensure"}, {"word": "domestic", "start": 6.3999996, "end": 6.8799996, "confidence": 0.9865976, "punctuated_word": "domestic"}, {"word": "tranquility", "start": 6.8799996, "end": 7.3799996, "confidence": 0.9985071, "punctuated_word": "tranquility,"}, {"word": "provide", "start": 7.9199996, "end": 8.24, "confidence": 0.9998047, "punctuated_word": "provide"}, {"word": "for", "start": 8.24, "end": 8.48, "confidence": 0.9999318, "punctuated_word": "for"}, {"word": "the", "start": 8.48, "end": 8.559999, "confidence": 0.99989927, "punctuated_word": "the"}, {"word": "common", "start": 8.559999, "end": 8.88, "confidence": 0.99929667, "punctuated_word": "common"}, {"word": "defense", "start": 8.88, "end": 9.355, "confidence": 0.99885136, "punctuated_word": "defense,"}, {"word": "promote", "start": 9.594999, "end": 9.915, "confidence": 0.9908227, "punctuated_word": "promote"}, {"word": "the", "start": 9.915, "end": 10.075, "confidence": 0.99942195, "punctuated_word": "the"}, {"word": "general", "start": 10.075, "end": 10.554999, "confidence": 0.9977016, "punctuated_word": "general"}, {"word": "welfare", "start": 10.554999, "end": 10.955, "confidence": 0.9617264, "punctuated_word": "welfare,"}, {"word": "and", "start": 10.955, "end": 11.195, "confidence": 0.9998332, "punctuated_word": "and"}, {"word": "secure", "start": 11.195, "end": 11.514999, "confidence": 0.99982965, "punctuated_word": "secure"}, {"word": "the", "start": 11.514999, "end": 11.674999, "confidence": 0.9998592, "punctuated_word": "the"}, {"word": "blessings", "start": 11.674999, "end": 11.994999, "confidence": 0.9988814, "punctuated_word": "blessings"}, {"word": "of", "start": 11.994999, "end": 12.235, "confidence": 0.9999485, "punctuated_word": "of"}, {"word": "liberty", "start": 12.235, "end": 12.714999, "confidence": 0.94858193, "punctuated_word": "liberty"}, {"word": "to", "start": 12.714999, "end": 12.875, "confidence": 0.9982723, "punctuated_word": "to"}, {"word": "ourselves", "start": 12.875, "end": 13.355, "confidence": 0.99971586, "punctuated_word": "ourselves"}, {"word": "and", "start": 13.355, "end": 13.514999, "confidence": 0.8741829, "punctuated_word": "and"}, {"word": "our", "start": 13.514999, "end": 13.674999, "confidence": 0.99951303, "punctuated_word": "our"}, {"word": "posterity", "start": 13.674999, "end": 14.174999, "confidence": 0.8549783, "punctuated_word": "posterity"}, {"word": "to", "start": 14.554999, "end": 14.795, "confidence": 0.606997, "punctuated_word": "to"}, {"word": "ordain", "start": 14.795, "end": 15.195, "confidence": 0.99927926, "punctuated_word": "ordain"}, {"word": "and", "start": 15.195, "end": 15.434999, "confidence": 0.9992618, "punctuated_word": "and"}, {"word": "establish", "start": 15.434999, "end": 15.934999, "confidence": 0.9976667, "punctuated_word": "establish"}, {"word": "this", "start": 15.994999, "end": 16.234999, "confidence": 0.999676, "punctuated_word": "this"}, {"word": "constitution", "start": 16.234999, "end": 16.734999, "confidence": 0.93753326, "punctuated_word": "constitution"}, {"word": "for", "start": 16.875, "end": 17.115, "confidence": 0.99904674, "punctuated_word": "for"}, {"word": "the", "start": 17.115, "end": 17.275, "confidence": 0.9999026, "punctuated_word": "the"}, {"word": "united", "start": 17.275, "end": 17.595, "confidence": 0.99956614, "punctuated_word": "United"}, {"word": "states", "start": 17.595, "end": 17.914999, "confidence": 0.9997981, "punctuated_word": "States"}, {"word": "of", "start": 17.914999, "end": 18.075, "confidence": 0.9995933, "punctuated_word": "of"}, {"word": "america", "start": 18.075, "end": 18.575, "confidence": 0.9946653, "punctuated_word": "America."}], "paragraphs": {"transcript": "\nWe, the people of the United States, in order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, and secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution for the United States of America.", "paragraphs": [{"sentences": [{"text": "We, the people of the United States, in order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, and secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution for the United States of America.", "start": 0.32, "end": 18.575}], "start": 0.32, "end": 18.575, "num_words": 52}]}}]}], "summary": {"result": "success", "short": "Speaker 1 discusses the goal of establishing a more perfect union, justice, and the common defense for the United States, as part of the Better Union movement. They emphasize the importance of these factors in securing the benefits of liberty for the United States and the world."}}} \ No newline at end of file diff --git a/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76.cmd b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76.cmd new file mode 100644 index 00000000..ce49b2cd --- /dev/null +++ b/tests/response_data/listen/rest/f3b6208a662156067a41bddd295a1a0a53ea34a268e27a8f1a9d7107aa99732f-a17f4880c5b4cf124ac54d06d77c9f0ab7f3fe1052ff1c7b090f7eaf8ede5b76.cmd @@ -0,0 +1 @@ +"preamble-rest.wav" \ No newline at end of file diff --git a/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-42fc5ed98cabc1fa1a2f276301c27c46dd15f6f5187cd93d944cc94fa81c8469-options.json b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-42fc5ed98cabc1fa1a2f276301c27c46dd15f6f5187cd93d944cc94fa81c8469-options.json new file mode 100644 index 00000000..fc8db31e --- /dev/null +++ b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-42fc5ed98cabc1fa1a2f276301c27c46dd15f6f5187cd93d944cc94fa81c8469-options.json @@ -0,0 +1 @@ +{"channels": 1, "encoding": "mulaw", "language": "en-US", "model": "nova-2", "punctuate": true, "sample_rate": 8000, "smart_format": true} \ No newline at end of file diff --git a/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-42fc5ed98cabc1fa1a2f276301c27c46dd15f6f5187cd93d944cc94fa81c8469-response.json b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-42fc5ed98cabc1fa1a2f276301c27c46dd15f6f5187cd93d944cc94fa81c8469-response.json new file mode 100644 index 00000000..3aafe0a2 --- /dev/null +++ b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-42fc5ed98cabc1fa1a2f276301c27c46dd15f6f5187cd93d944cc94fa81c8469-response.json @@ -0,0 +1 @@ +{"channel": {"alternatives": [{"transcript": "for the United States of America.", "confidence": 0.99902344, "words": [{"word": "for", "start": 17.56, "end": 17.72, "confidence": 0.98828125, "punctuated_word": "for"}, {"word": "the", "start": 17.72, "end": 17.88, "confidence": 1.0, "punctuated_word": "the"}, {"word": "united", "start": 17.88, "end": 17.96, "confidence": 1.0, "punctuated_word": "United"}, {"word": "states", "start": 17.96, "end": 18.12, "confidence": 0.99902344, "punctuated_word": "States"}, {"word": "of", "start": 18.12, "end": 18.2, "confidence": 0.99902344, "punctuated_word": "of"}, {"word": "america", "start": 18.2, "end": 18.66, "confidence": 0.9609375, "punctuated_word": "America."}]}]}, "metadata": {"model_info": {"name": "2-general-nova", "version": "2024-01-18.26916", "arch": "nova-2"}, "request_id": "13c351e8-d149-42bd-b579-1fc1e008452e", "model_uuid": "c0d1a568-ce81-4fea-97e7-bd45cb1fdf3c"}, "type": "Results", "channel_index": [0, 1], "duration": 1.6599998, "start": 17.0, "is_final": true, "from_finalize": false, "speech_final": true} \ No newline at end of file diff --git a/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-42fc5ed98cabc1fa1a2f276301c27c46dd15f6f5187cd93d944cc94fa81c8469.cmd b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-42fc5ed98cabc1fa1a2f276301c27c46dd15f6f5187cd93d944cc94fa81c8469.cmd new file mode 100644 index 00000000..5ea03fe2 --- /dev/null +++ b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-42fc5ed98cabc1fa1a2f276301c27c46dd15f6f5187cd93d944cc94fa81c8469.cmd @@ -0,0 +1 @@ +"preamble-websocket.wav" \ No newline at end of file diff --git a/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-d7334c26cf6468c191e05ff5e8151da9b67985c66ab177e9446fd14bbafd70df-options.json b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-d7334c26cf6468c191e05ff5e8151da9b67985c66ab177e9446fd14bbafd70df-options.json new file mode 100644 index 00000000..fc8db31e --- /dev/null +++ b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-d7334c26cf6468c191e05ff5e8151da9b67985c66ab177e9446fd14bbafd70df-options.json @@ -0,0 +1 @@ +{"channels": 1, "encoding": "mulaw", "language": "en-US", "model": "nova-2", "punctuate": true, "sample_rate": 8000, "smart_format": true} \ No newline at end of file diff --git a/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-d7334c26cf6468c191e05ff5e8151da9b67985c66ab177e9446fd14bbafd70df-response.json b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-d7334c26cf6468c191e05ff5e8151da9b67985c66ab177e9446fd14bbafd70df-response.json new file mode 100644 index 00000000..c2aa696a --- /dev/null +++ b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-d7334c26cf6468c191e05ff5e8151da9b67985c66ab177e9446fd14bbafd70df-response.json @@ -0,0 +1 @@ +{"channel": {"alternatives": [{"transcript": "Testing. 123. Testing. 123.", "confidence": 0.9885254, "words": [{"word": "testing", "start": 1.22, "end": 1.62, "confidence": 0.48156738, "punctuated_word": "Testing."}, {"word": "123", "start": 1.62, "end": 2.12, "confidence": 0.9367676, "punctuated_word": "123."}, {"word": "testing", "start": 2.1799998, "end": 2.6799998, "confidence": 0.9885254, "punctuated_word": "Testing."}, {"word": "123", "start": 3.1399999, "end": 3.6399999, "confidence": 0.9941406, "punctuated_word": "123."}]}]}, "metadata": {"model_info": {"name": "2-general-nova", "version": "2024-01-18.26916", "arch": "nova-2"}, "request_id": "74abc0e7-79db-45e5-80fc-3f0bc3c152a5", "model_uuid": "c0d1a568-ce81-4fea-97e7-bd45cb1fdf3c"}, "type": "Results", "channel_index": [0, 1], "duration": 3.08, "start": 0.74, "is_final": true, "from_finalize": false, "speech_final": true} \ No newline at end of file diff --git a/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-d7334c26cf6468c191e05ff5e8151da9b67985c66ab177e9446fd14bbafd70df.cmd b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-d7334c26cf6468c191e05ff5e8151da9b67985c66ab177e9446fd14bbafd70df.cmd new file mode 100644 index 00000000..b4a04182 --- /dev/null +++ b/tests/response_data/listen/websocket/a6d1b12d5ce73a51a7b69ab156f0c98c72cdc1cfcf4a25f7b634c328cce4d760-d7334c26cf6468c191e05ff5e8151da9b67985c66ab177e9446fd14bbafd70df.cmd @@ -0,0 +1 @@ +"testing-websocket.wav" \ No newline at end of file diff --git a/tests/response_data/read/rest/3917a1c81c08e360c0d4bba0ff9ebd645e610e4149483e5f2888a2c5df388b37-23e873efdfd4d680286fda14ff8f10864218311e79efc92ecc82bce3e574c366-options.json b/tests/response_data/read/rest/3917a1c81c08e360c0d4bba0ff9ebd645e610e4149483e5f2888a2c5df388b37-23e873efdfd4d680286fda14ff8f10864218311e79efc92ecc82bce3e574c366-options.json new file mode 100644 index 00000000..b382e675 --- /dev/null +++ b/tests/response_data/read/rest/3917a1c81c08e360c0d4bba0ff9ebd645e610e4149483e5f2888a2c5df388b37-23e873efdfd4d680286fda14ff8f10864218311e79efc92ecc82bce3e574c366-options.json @@ -0,0 +1 @@ +{"language": "en", "summarize": true} \ No newline at end of file diff --git a/tests/response_data/read/rest/3917a1c81c08e360c0d4bba0ff9ebd645e610e4149483e5f2888a2c5df388b37-23e873efdfd4d680286fda14ff8f10864218311e79efc92ecc82bce3e574c366-response.json b/tests/response_data/read/rest/3917a1c81c08e360c0d4bba0ff9ebd645e610e4149483e5f2888a2c5df388b37-23e873efdfd4d680286fda14ff8f10864218311e79efc92ecc82bce3e574c366-response.json new file mode 100644 index 00000000..d641a919 --- /dev/null +++ b/tests/response_data/read/rest/3917a1c81c08e360c0d4bba0ff9ebd645e610e4149483e5f2888a2c5df388b37-23e873efdfd4d680286fda14ff8f10864218311e79efc92ecc82bce3e574c366-response.json @@ -0,0 +1 @@ +{"metadata": {"request_id": "34d422d4-43d7-43f1-a8a7-3fd3977045ee", "created": "2024-07-30T13:50:40.770Z", "language": "en", "summary_info": {"model_uuid": "67875a7f-c9c4-48a0-aa55-5bdb8a91c34a", "input_tokens": 1855, "output_tokens": 145}}, "results": {"summary": {"text": "The potential for voice-based interfaces in conversational AI applications is discussed, with a focus on voice-premises and wearable devices. The success of voice-first experiences and tools, including DeepgramQuad, is highlighted, along with the potential for high-throughput and fast text-to-speech conversion for AI agents. The speakers emphasize the benefits of voice quality, including natural speech flow, and the importance of tailoring voice to specific applications. They also mention their involvement in machine learning and their plans to expand their waitlist for a speech-to-text model. They expect to release generally early next year, but if working on any real-time AI agent use cases, they can join their waitlist to jumpstart their development in production."}}} \ No newline at end of file diff --git a/tests/response_data/read/rest/3917a1c81c08e360c0d4bba0ff9ebd645e610e4149483e5f2888a2c5df388b37-23e873efdfd4d680286fda14ff8f10864218311e79efc92ecc82bce3e574c366.cmd b/tests/response_data/read/rest/3917a1c81c08e360c0d4bba0ff9ebd645e610e4149483e5f2888a2c5df388b37-23e873efdfd4d680286fda14ff8f10864218311e79efc92ecc82bce3e574c366.cmd new file mode 100644 index 00000000..ddd42715 --- /dev/null +++ b/tests/response_data/read/rest/3917a1c81c08e360c0d4bba0ff9ebd645e610e4149483e5f2888a2c5df388b37-23e873efdfd4d680286fda14ff8f10864218311e79efc92ecc82bce3e574c366.cmd @@ -0,0 +1 @@ +"conversation.txt" \ No newline at end of file diff --git a/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef-options.json b/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef-options.json new file mode 100644 index 00000000..4b01a5d3 --- /dev/null +++ b/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef-options.json @@ -0,0 +1 @@ +{"model": "aura-asteria-en", "encoding": "linear16", "sample_rate": 24000} \ No newline at end of file diff --git a/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef-response.json b/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef-response.json new file mode 100644 index 00000000..a20f5fc0 --- /dev/null +++ b/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef-response.json @@ -0,0 +1 @@ +{"content_type": "audio/wav", "request_id": "b84facd1-44e0-4b9a-88ca-77d77b7d9de3", "model_uuid": "d6db4f9d-0321-467c-a57c-3daca7d5b68e", "model_name": "aura-asteria-en", "characters": 13, "transfer_encoding": "chunked", "date": "Tue, 30 Jul 2024 13:50:40 GMT"} \ No newline at end of file diff --git a/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef.cmd b/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef.cmd new file mode 100644 index 00000000..c872090c --- /dev/null +++ b/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef.cmd @@ -0,0 +1 @@ +Hello, world. \ No newline at end of file diff --git a/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef.wav b/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef.wav new file mode 100644 index 00000000..7762ef9c Binary files /dev/null and b/tests/response_data/speak/rest/18144fa7f4709bc9972c24d0addc8faa360dca933e7e0027b062e57b7c41f426-f8c3bf62a9aa3e6fc1619c250e48abe7519373d3edf41be62eb5dc45199af2ef.wav differ diff --git a/tests/unit_test/conversation.txt b/tests/unit_test/conversation.txt new file mode 100644 index 00000000..2af89e08 --- /dev/null +++ b/tests/unit_test/conversation.txt @@ -0,0 +1,71 @@ +Meet Deepgram Aura: real-time text-to-speech for real-time AI agents +---------- +It’s been a year since large language models (LLMs) seemingly went mainstream overnight (Happy Birthday, ChatGPT!!!), and the world has witnessed both rapid development of these technologies and immense interest in their potential. We believe that we have reached an inflection point where voice-based interfaces will be the primary means to accessing LLMs and the experiences they unlock. Here are a few recent signals in support of our thesis: + +- Good old fashioned voice notes are enjoying a healthy resurgence. + +- According to a recent survey, a majority of respondents stated phone calls are still their preferred communication channel for resolving customer service issues. + +- An emerging boom in wearable devices equipped with continuous listening and speech AI technology is gaining steam. + +- OpenAI recently enabled voice interactions in ChatGPT. + +- A wave of interest in voice-first experiences and tools is sweeping across brands, investors, and tech companies. + +Thanks to ChatGPT and the advent of the LLM era, the conversational AI tech stack has advanced sufficiently to support productive (not frustrating) voice-powered AI assistants and agents that can interact with humans in a natural manner. We have already observed this from our most innovative customers who are actively turning to these technologies to build a diverse range of AI agents for voice ordering systems, interview bots, personal AI assistants, automated drive-thru tellers, and autonomous sales and customer service agents. + +While these AI agents hold immense potential, many customers have expressed their dissatisfaction with the current crop of voice AI vendors, citing roadblocks related to speed, cost, reliability, and conversational quality. That’s why we’re excited to introduce our own text-to-speech (TTS) API, Deepgram Aura, built for real-time, conversational voice AI agents. + +Whether used on its own or in conjunction with our industry-leading Nova-2 speech-to-text API, we’ll soon provide developers with a complete speech AI platform, giving them the essential building blocks they need to build high throughput, real-time AI agents of the future. + +We are thrilled about the progress our initial group of developers has made using Aura, so much so that we are extending limited access to a select few partners who will be free to begin integrating with Aura immediately. With their feedback, we’ll continue to enhance our suite of voices and API features, as well as ensure a smooth launch of their production-grade applications. + + +What Customers Want +---------- +I feel the need, the need for speed +What we’ve heard from many of our customers and partners is that voice AI technology today caters to two main areas: high production or high throughput. + +High Production is all about crafting the perfect voice. It's used in projects where every tone and inflection matters, like in video games or audiobooks, to really bring a scene or story to life. Here, voice quality is king, with creators investing hours to fine-tune every detail for a powerful emotional impact. The primary benefit is the ability to swap out a high-paid voice actor with AI where you have more dynamic control over what’s being said while also achieving some cost savings. But these use cases are more specialized and represent just a sliver of the overall voice AI opportunity. + +On the flip side, High Throughput is about handling many quick, one-off interactions for real-time conversations at scale. Think fast food ordering, booking appointments, or inquiring about the latest deals at a car dealership. These tasks are relevant to just about everyone on the planet, and they require fast, efficient text-to-speech conversion for an AI agent to fulfill them. While voice quality is still important to keep users engaged, quality here is more about the naturalness of the flow of conversation and less about sounding like Morgan Freeman. But the primary focus for most customers in this category is on improving customer outcomes, meaning speed and efficiency are must-haves for ensuring these everyday exchanges are smooth and reliable at high volume. + +"Deepgram showed me less than 200ms latency today. That's the fastest text-to-speech I’ve ever seen. And our customers would be more than satisfied with the conversation quality." + +Jordan Dearsley, Co-founder at Vapi + +Although high production use cases seem to be well-served with UI-centric production tools, high throughput, real-time use cases still mostly rely on APIs provided by the major cloud providers. And our customers have been telling us that they’ve been falling short, with insufficient quality for a good user experience, too much latency to make real-time use cases work, and costs too expensive to operate at scale. + + +More human than human +---------- +With Aura, we’ll give realistic voices to AI agents. Our goal is to craft text-to-speech capabilities that mirror natural human conversations, including timely responses, the incorporation of natural speech fillers like 'um' and 'uh' during contemplation, and the modulation of tone and emotion according to the conversational context. We aim to incorporate laughter and other speech nuances as well. Furthermore, we are dedicated to tailoring these voices to their specific applications, ensuring they remain composed and articulate, particularly in enunciating account numbers and business names with precision. + +"I don’t really consider Azure and the other guys anymore because the voices sound so robotic." +Jordan Dearsley, Co-founder at Vapi + +In blind evaluation trials conducted for benchmarking, early versions of Aura have consistently been rated as sounding more human than prominent alternatives, even outranking human speakers for various audio clips more often than not on average. We were pleasantly surprised by these results (stay tuned for a future post containing comprehensive benchmarks for speed and quality soon!), so much so that we’re accelerating our development timeline and publicly announcing today’s waitlist expansion. + +Here are some sample clips generated by one of the earliest iterations of Aura. The quality and overall performance will continue to improve with additional model training and refinement. We encourage you to give them a listen and note the naturalness of their cadence, rhythm, and tone in the flow of conversation with another human. + + +Our Approach +---------- +For nearly a decade, we’ve worked tirelessly to advance the art of the possible in speech recognition and spoken language understanding. Along the way, we’ve transcribed trillions of spoken words into highly accurate transcriptions. Our model research team has developed novel transformer architectures equipped to deal with the nuances of conversational audio–across different languages, accents, and dialects, while handling disfluencies and the changing rhythms, tones, cadences, and inflections that occur in natural, back-and-forth conversations. + +And all the while, we’ve purposefully built our models under limited constraints to optimize their speed and efficiency. With support for dozens of languages and custom model training, our technical team has trained and deployed thousands of speech AI models (more than anybody else) which we operate and manage for our customers each day using our own computing infrastructure. + +We also have our own in-house data labeling and data ops team with years of experience building bespoke workflows to record, store, and transfer vast amounts of audio in order to label it and continuously grow our bank of high-quality data (millions of hours and counting) used in our model training. + +These combined experiences have made us experts in processing and modeling speech audio, especially in support of streaming use cases with our real-time STT models. Our customers have been asking if we could apply the same approach for TTS, and we can. + +So what can you expect from Aura? Delivering the same market-leading value and performance as Nova-2 does for STT. Aura is built to be the panacea for speed, quality, and efficiency–the fastest of the high-quality options, and the best quality of the fast ones. And that’s really what end users need and what our customers have been asking us to build. + +"Deepgram is a valued partner, providing our customers with high throughput speech-to-text that delivers unrivaled performance without tradeoffs between quality, speed, and cost. We're excited to see Deepgram extend their speech AI platform and bring this approach to the text-to-speech market." - Richard Dumas, VP AI Product Strategy at Five9 + + +What's Next +---------- +As we’ve discussed, scaled voice agents are a high throughput use case, and we believe their success will ultimately depend on a unified approach to audio, one that strikes the right balance between natural voice quality, responsiveness, and cost-efficiency. And with Aura, we’re just getting started. We’re looking forward to continuing to work with customers like Asurion and partners like Five9 across speech-to-text AND text-to-speech as we help them define the future of AI agents, and we invite you to join us on this journey. + +We expect to release generally early next year, but if you’re working on any real-time AI agent use cases, join our waitlist today to jumpstart your development in production as we continue to refine our model and API features with your direct feedback. \ No newline at end of file diff --git a/tests/unit_test/preamble-rest.wav b/tests/unit_test/preamble-rest.wav new file mode 100644 index 00000000..1049d0d2 Binary files /dev/null and b/tests/unit_test/preamble-rest.wav differ diff --git a/tests/unit_test/preamble-websocket.wav b/tests/unit_test/preamble-websocket.wav new file mode 100644 index 00000000..f901de75 Binary files /dev/null and b/tests/unit_test/preamble-websocket.wav differ diff --git a/tests/unit_test/test_unit_listen_rest_file.py b/tests/unit_test/test_unit_listen_rest_file.py new file mode 100644 index 00000000..9bb52c58 --- /dev/null +++ b/tests/unit_test/test_unit_listen_rest_file.py @@ -0,0 +1,109 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import contextlib +import os +import json +import pytest +import hashlib +from http import HTTPStatus + +import httpx + +from deepgram import DeepgramClient, PrerecordedOptions, FileSource +from tests.utils import read_metadata_string, save_metadata_string + +MODEL = "2-general-nova" + +# response constants +FILE1 = "preamble-rest.wav" +FILE1_SMART_FORMAT = "We, the people of the United States, in order to form a more perfect union, establish justice, ensure domestic tranquility, provide for the common defense, promote the general welfare, and secure the blessings of liberty to ourselves and our posterity to ordain and establish this constitution for the United States of America." +FILE1_SUMMARIZE1 = "Speaker 1 discusses the goal of establishing a more perfect union, justice, and the common defense for the United States, as part of the Better Union movement. They emphasize the importance of these factors in securing the benefits of liberty for the United States and the world." +FILE1_SUMMARIZE2 = "Speaker 1 discusses the goal of establishing a more perfect union, justice, and the common defense for the United States, as part of the Better Union movement. They emphasize the importance of these goals in securing the blessings of liberty for the United States and the nation." +FILE1_SUMMARIZE3 = "Speaker 1 discusses the importance of protecting the common defense and securing the benefits of liberty to establish...hey also mention the importance of providing for the general welfare and securing the bounty of liberty to themselves." + +# Create a list of tuples to store the key-value pairs +input_output = [ + ( + FILE1, + PrerecordedOptions(model="nova-2", smart_format=True), + {"results.channels.0.alternatives.0.transcript": [FILE1_SMART_FORMAT]}, + ), + ( + FILE1, + PrerecordedOptions(model="nova-2", smart_format=True, summarize="v2"), + { + "results.channels.0.alternatives.0.transcript": [FILE1_SMART_FORMAT], + "results.summary.short": [ + FILE1_SUMMARIZE1, + FILE1_SUMMARIZE2, + FILE1_SUMMARIZE3, + ], + }, + ), +] + + +@pytest.mark.parametrize("filename, options, expected_output", input_output) +def test_unit_listen_rest_file(filename, options, expected_output): + # options + filenamestr = json.dumps(filename) + input_sha256sum = hashlib.sha256(filenamestr.encode()).hexdigest() + option_sha256sum = hashlib.sha256(options.to_json().encode()).hexdigest() + + unique = f"{option_sha256sum}-{input_sha256sum}" + + # filenames + file_resp = f"tests/response_data/listen/rest/{unique}-response.json" + file_error = f"tests/response_data/listen/rest/{unique}-error.json" + + # clean up + with contextlib.suppress(FileNotFoundError): + os.remove(file_error) + + # read metadata + response_data = read_metadata_string(file_resp) + + # Create a Deepgram client + deepgram = DeepgramClient() + + # file buffer + with open(f"tests/daily_test/{filename}", "rb") as file: + buffer_data = file.read() + + payload: FileSource = { + "buffer": buffer_data, + } + + # make request + transport = httpx.MockTransport( + lambda request: httpx.Response(HTTPStatus.OK, content=response_data) + ) + response = deepgram.listen.rest.v("1").transcribe_file( + payload, options, transport=transport + ) + + # Check the response + for key, value in response.metadata.model_info.items(): + assert ( + value.name == MODEL + ), f"Test ID: {unique} - Expected: {MODEL}, Actual: {value.name}" + + for key, value in expected_output.items(): + actual = response.eval(key) + expected = value + + try: + assert ( + actual in expected + ), f"Test ID: {unique} - Key: {key}, Expected: {expected}, Actual: {actual}" + finally: + # if asserted + if not (actual in expected): + failure = { + "actual": actual, + "expected": expected, + } + failuresstr = json.dumps(failure) + save_metadata_string(file_error, failuresstr) diff --git a/tests/unit_test/test_unit_listen_rest_url.py b/tests/unit_test/test_unit_listen_rest_url.py new file mode 100644 index 00000000..56568649 --- /dev/null +++ b/tests/unit_test/test_unit_listen_rest_url.py @@ -0,0 +1,97 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import contextlib +import os +import json +import pytest +import hashlib +from http import HTTPStatus + +import httpx + +from deepgram import DeepgramClient, PrerecordedOptions, PrerecordedResponse +from tests.utils import read_metadata_string, save_metadata_string + +MODEL = "2-general-nova" + +# response constants +URL1 = { + "url": "https://static.deepgram.com/examples/Bueller-Life-moves-pretty-fast.wav" +} +URL1_SMART_FORMAT1 = "Yep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it." +URL1_SUMMARIZE1 = "Yep. I said it before and I'll say it again. Life moves pretty fast. You don't stop and look around once in a while, you could miss it." + +# Create a list of tuples to store the key-value pairs +input_output = [ + ( + URL1, + PrerecordedOptions(model="nova-2", smart_format=True), + {"results.channels.0.alternatives.0.transcript": [URL1_SMART_FORMAT1]}, + ), + ( + URL1, + PrerecordedOptions(model="nova-2", smart_format=True, summarize="v2"), + { + "results.channels.0.alternatives.0.transcript": [URL1_SMART_FORMAT1], + "results.summary.short": [URL1_SUMMARIZE1], + }, + ), +] + + +@pytest.mark.parametrize("url, options, expected_output", input_output) +def test_unit_listen_rest_url(url, options, expected_output): + # options + urlstr = json.dumps(url) + input_sha256sum = hashlib.sha256(urlstr.encode()).hexdigest() + option_sha256sum = hashlib.sha256(options.to_json().encode()).hexdigest() + + unique = f"{option_sha256sum}-{input_sha256sum}" + + # filenames + file_resp = f"tests/response_data/listen/rest/{unique}-response.json" + file_error = f"tests/response_data/listen/rest/{unique}-error.json" + + # clean up + with contextlib.suppress(FileNotFoundError): + os.remove(file_error) + + # read metadata + response_data = read_metadata_string(file_resp) + + # Create a Deepgram client + deepgram = DeepgramClient() + + # make request + transport = httpx.MockTransport( + lambda request: httpx.Response(HTTPStatus.OK, content=response_data) + ) + response = deepgram.listen.rest.v("1").transcribe_url( + url, options, transport=transport + ) + + # Check the response + for key, value in response.metadata.model_info.items(): + assert ( + value.name == MODEL + ), f"Test ID: {unique} - Expected: {MODEL}, Actual: {value.name}" + + for key, value in expected_output.items(): + actual = response.eval(key) + expected = value + + try: + assert ( + actual in expected + ), f"Test ID: {unique} - Key: {key}, Expected: {expected}, Actual: {actual}" + finally: + # if asserted + if not (actual in expected): + failure = { + "actual": actual, + "expected": expected, + } + failuresstr = json.dumps(failure) + save_metadata_string(file_error, failuresstr) diff --git a/tests/unit_test/test_unit_listen_websocket.py b/tests/unit_test/test_unit_listen_websocket.py new file mode 100644 index 00000000..071f67dd --- /dev/null +++ b/tests/unit_test/test_unit_listen_websocket.py @@ -0,0 +1,152 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import contextlib +import os +import json +import pytest +import hashlib +import time + +from websocket_server import WebsocketServer, WebsocketServerThread + +from deepgram import ( + DeepgramClient, + DeepgramClientOptions, + LiveOptions, + LiveTranscriptionEvents, +) + +from tests.utils import save_metadata_string + +MODEL = "2-general-nova" + +# response constants +INPUT1 = '{"channel": {"alternatives": [{"transcript": "Testing 123. Testing 123.", "confidence": 0.97866726, "words": [{"word": "testing", "start": 1.12, "end": 1.62, "confidence": 0.97866726, "punctuated_word": "Testing"}, {"word": "123", "start": 1.76, "end": 1.8399999, "confidence": 0.73616695, "punctuated_word": "123."}, {"word": "testing", "start": 1.8399999, "end": 2.34, "confidence": 0.99529773, "punctuated_word": "Testing"}, {"word": "123", "start": 2.8799999, "end": 3.3799999, "confidence": 0.9773819, "punctuated_word": "123."}]}]}, "metadata": {"model_info": {"name": "2-general-nova", "version": "2024-01-18.26916", "arch": "nova-2"}, "request_id": "0d2f1ddf-b9aa-40c9-a761-abcd8cf5734f", "model_uuid": "c0d1a568-ce81-4fea-97e7-bd45cb1fdf3c"}, "type": "Results", "channel_index": [0, 1], "duration": 3.69, "start": 0.0, "is_final": true, "from_finalize": false, "speech_final": true}' +OUTPUT1 = "Testing 123. Testing 123." + +# Create a list of tuples to store the key-value pairs +input_output = [ + ( + LiveOptions( + language="en-US", + smart_format=True, + encoding="mulaw", + channels=1, + sample_rate=8000, + punctuate=True, + ), + INPUT1, + OUTPUT1, + ), +] + +response = "" + + +@pytest.mark.parametrize("options, input, output", input_output) +def test_unit_listen_websocket(options, input, output): + # Save the options + input_sha256sum = hashlib.sha256(input.encode()).hexdigest() + option_sha256sum = hashlib.sha256(options.to_json().encode()).hexdigest() + + unique = f"{option_sha256sum}-{input_sha256sum}" + + # filenames + file_options = f"tests/response_data/listen/websocket/{unique}-options.json" + file_input = f"tests/response_data/listen/websocket/{unique}-input.cmd" + file_resp = f"tests/response_data/listen/websocket/{unique}-response.json" + file_error = f"tests/response_data/listen/websocket/{unique}-error.json" + + # clean up + with contextlib.suppress(FileNotFoundError): + os.remove(file_input) + with contextlib.suppress(FileNotFoundError): + os.remove(file_options) + with contextlib.suppress(FileNotFoundError): + os.remove(file_resp) + with contextlib.suppress(FileNotFoundError): + os.remove(file_error) + + # server + def new_client(client, server): + server.send_message_to_all(input) + + # start websocket server + server = WebsocketServer(host="127.0.0.1", port=13254) + server.set_fn_new_client(new_client) + + server.daemon = True + server.thread = WebsocketServerThread( + target=server.serve_forever, daemon=True, logger=None + ) + server.thread.start() + + # Create a Deepgram client + config = DeepgramClientOptions( + url="ws://127.0.0.1:13254", options={"keepalive": "true"} + ) + deepgram: DeepgramClient = DeepgramClient("", config) + + # Send the URL to Deepgram + dg_connection = deepgram.listen.websocket.v("1") + + def on_message(self, result, **kwargs): + global response + sentence = result.channel.alternatives[0].transcript + if len(sentence) == 0: + return + if result.is_final: + if len(response) > 0: + response = response + " " + response = response + sentence + + dg_connection.on(LiveTranscriptionEvents.Transcript, on_message) + + # connect + assert dg_connection.start(options) == True + time.sleep(0.5) + + # each iteration is 0.5 seconds * 20 iterations = 10 second timeout + timeout = 0 + exit = False + while dg_connection.is_connected() and timeout < 20 and not exit: + if response == output: + exit = True + break + timeout = timeout + 1 + time.sleep(0.5) + + # close client + dg_connection.finish() + time.sleep(0.25) + + # close server + server.shutdown_gracefully() + + # Check the response + if response == "": + assert response != "", f"Test ID: {unique} - No response received" + elif response == "" and timeout > 20: + assert ( + timeout < 20 + ), f"Test ID: {unique} - Timed out OR the value is not in the expected_output" + + # Check the response + actual = response + expected = output + + try: + assert ( + actual in expected + ), f"Test ID: {unique} - Expected: {expected}, Actual: {actual}" + finally: + # if asserted + if not (actual in expected): + failure = { + "actual": actual, + "expected": expected, + } + failuresstr = json.dumps(failure) + save_metadata_string(file_error, failuresstr) diff --git a/tests/unit_test/test_unit_read_rest_file.py b/tests/unit_test/test_unit_read_rest_file.py new file mode 100644 index 00000000..1233c910 --- /dev/null +++ b/tests/unit_test/test_unit_read_rest_file.py @@ -0,0 +1,99 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import contextlib +import os +import json +import pytest +import hashlib +from http import HTTPStatus + +import httpx + +from deepgram import DeepgramClient, AnalyzeOptions, FileSource +from tests.utils import read_metadata_string, save_metadata_string + +MODEL = "2-general-nova" + +# response constants +FILE1 = "conversation.txt" +FILE1_SUMMARIZE1 = "The potential for voice-based interfaces in conversational AI applications is discussed, with a focus on voice-premises and wearable devices. The success of voice-first experiences and tools, including DeepgramQuad, is highlighted, along with the potential for high-throughput and fast text-to-speech conversion for AI agents. The speakers emphasize the benefits of voice quality, including natural speech flow, and the importance of tailoring voice to specific applications. They also mention their involvement in machine learning and their plans to expand their waitlist for a speech-to-text model. They expect to release generally early next year, but if working on any real-time AI agent use cases, they can join their waitlist to jumpstart their development in production." +FILE1_SUMMARIZE2 = "The speakers discuss the potential for voice-based interfaces to unlock conversational AI applications, including high-performance voice-to-text (TTS) and voice-to-text (TTS) API and DeepgramQuad. They also mention their involvement in machine learning and their plans to expand their waitlist for a speech-to-text model. They expect to release generally early next year, but if working on any real-time AI agent use cases, they can join their waitlist to jumpstart their development in production. They also discuss the benefits of voice quality, including the ability to swap between voices, the naturalness of the flow of conversations, and the importance of tailoring voice to specific applications. They plan to expand their waitlist for a speech-to-text model and work on real-time AI agent use cases." +FILE1_SUMMARIZE3 = "The potential for voice-based interfaces in conversational AI applications is discussed, with a focus on voice-premised and wearable devices. The speakers emphasize the benefits of voice quality, including natural speech flow, and the potential for AI agents to be more human than humans in speech recognition. They also mention their involvement in machine learning and plans to expand their waitlist for a speech-to-text model. They expect to release generally early next year, but if working on any real-time AI agent use cases, they can join their waitlist to jumpstart their development in production." +FILE1_SUMMARIZE4 = "The speakers discuss the potential for voice-based interfaces to unlock conversational AI applications, including the use of natural voices and audio to create voice-like experiences. They also emphasize the benefits of voice quality, including the ability to swap between voices and the natural flow of conversations. The potential for AI agents to be more human than humans in speech recognition, including the use of natural voices and audio to create voice-like experiences. They plan to expand their waitlist for a speech-to-text model and work on real-time AI agent use cases, with a general release expected early next year. They also mention their involvement in machine learning and their plans to expand their waitlist for a speech-to-text model." + +# Create a list of tuples to store the key-value pairs +input_output = [ + ( + FILE1, + AnalyzeOptions(language="en", summarize=True), + { + "results.summary.text": [ + FILE1_SUMMARIZE1, + FILE1_SUMMARIZE2, + FILE1_SUMMARIZE3, + FILE1_SUMMARIZE4, + ] + }, + ), +] + + +@pytest.mark.parametrize("filename, options, expected_output", input_output) +def test_unit_read_rest_file(filename, options, expected_output): + # options + filenamestr = json.dumps(filename) + input_sha256sum = hashlib.sha256(filenamestr.encode()).hexdigest() + option_sha256sum = hashlib.sha256(options.to_json().encode()).hexdigest() + + unique = f"{option_sha256sum}-{input_sha256sum}" + + # filenames + file_resp = f"tests/response_data/read/rest/{unique}-response.json" + file_error = f"tests/response_data/read/rest/{unique}-error.json" + + # clean up + with contextlib.suppress(FileNotFoundError): + os.remove(file_error) + + # read metadata + response_data = read_metadata_string(file_resp) + + # Create a Deepgram client + deepgram = DeepgramClient() + + # file buffer + with open(f"tests/daily_test/{filename}", "rb") as file: + buffer_data = file.read() + + payload: FileSource = { + "buffer": buffer_data, + } + + # make request + transport = httpx.MockTransport( + lambda request: httpx.Response(HTTPStatus.OK, content=response_data) + ) + response = deepgram.read.analyze.v("1").analyze_text( + payload, options, transport=transport + ) + + # Check the response + for key, value in expected_output.items(): + actual = response.eval(key) + expected = value + + try: + assert ( + actual in expected + ), f"Test ID: {unique} - Key: {key}, Expected: {expected}, Actual: {actual}" + finally: + # if asserted + if not (actual in expected): + failure = { + "actual": actual, + "expected": expected, + } + failuresstr = json.dumps(failure) + save_metadata_string(file_error, failuresstr) diff --git a/tests/unit_test/test_unit_speak_rest.py b/tests/unit_test/test_unit_speak_rest.py new file mode 100644 index 00000000..15206795 --- /dev/null +++ b/tests/unit_test/test_unit_speak_rest.py @@ -0,0 +1,96 @@ +# Copyright 2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import contextlib +import os +import json +import pytest +import hashlib +from http import HTTPStatus + +import httpx + +from deepgram import DeepgramClient, SpeakOptions, PrerecordedOptions, FileSource + +from tests.utils import read_metadata_string, save_metadata_string + +MODEL = "aura-asteria-en" + +# response constants +TEXT1 = "Hello, world." + +# Create a list of tuples to store the key-value pairs +input_output = [ + ( + TEXT1, + SpeakOptions(model=MODEL, encoding="linear16", sample_rate=24000), + { + "content_type": ["audio/wav"], + "model_uuid": ["d6db4f9d-0321-467c-a57c-3daca7d5b68e"], + "model_name": ["aura-asteria-en"], + "characters": ["13"], + }, + ), +] + + +@pytest.mark.parametrize("text, options, expected_output", input_output) +def test_unit_speak_rest(text, options, expected_output): + # Save the options + input_sha256sum = hashlib.sha256(text.encode()).hexdigest() + option_sha256sum = hashlib.sha256(options.to_json().encode()).hexdigest() + + unique = f"{option_sha256sum}-{input_sha256sum}" + + # filenames + file_resp = f"tests/response_data/speak/rest/{unique}-response.json" + file_error = f"tests/response_data/speak/rest/{unique}-error.json" + + # clean up + with contextlib.suppress(FileNotFoundError): + os.remove(file_error) + + # read metadata + response_data = read_metadata_string(file_resp) + response_data = response_data.replace("_", "-") + response_data = response_data.replace("characters", "char-count") + + # convert to json to fix the char-count to string + headers = json.loads(response_data) + headers["char-count"] = str(headers.get("char-count")) + + # Create a Deepgram client + deepgram = DeepgramClient() + + # input text + input_text = {"text": text} + + # Send the URL to Deepgram + transport = httpx.MockTransport( + lambda request: httpx.Response(HTTPStatus.OK, headers=headers) + ) + response = deepgram.speak.rest.v("1").stream_memory( + input_text, options, transport=transport + ) + # convert to string + response["characters"] = str(response["characters"]) + + # Check the response + for key, value in expected_output.items(): + actual = response.eval(key) + expected = value + + try: + assert ( + actual in expected + ), f"Test ID: {unique} - Key: {key}, Expected: {expected}, Actual: {actual}" + finally: + # if asserted + if not (actual in expected): + failure = { + "actual": actual, + "expected": expected, + } + failuresstr = json.dumps(failure) + save_metadata_string(file_error, failuresstr) diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py new file mode 100644 index 00000000..59827132 --- /dev/null +++ b/tests/utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +from .utils import ( + get_query_params, + create_dirs, + save_metadata_bytes, + save_metadata_string, + read_metadata_string, + read_metadata_bytes, + string_match_failure, +) diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py new file mode 100644 index 00000000..a06eeabf --- /dev/null +++ b/tests/utils/test_utils.py @@ -0,0 +1,48 @@ +# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import pytest + +from .utils import ( + get_query_params, + create_dirs, + save_metadata_bytes, + save_metadata_string, + read_metadata_string, + read_metadata_bytes, + string_match_failure, +) + + +def test_get_query_params(): + assert get_query_params("http://example.com/path?name=test") == "name=test" + assert get_query_params("http://example.com/path") == "" + + +def test_create_dirs(tmp_path): + test_dir = tmp_path / "test_dir" + test_file = test_dir / "test_file.txt" + create_dirs(test_file) + assert test_dir.exists() + + +def test_save_and_read_metadata_string(tmp_path): + test_file = tmp_path / "test_file.txt" + test_data = "test_data" + save_metadata_string(test_file, test_data) + assert read_metadata_string(test_file) == test_data + + +def test_save_and_read_metadata_bytes(tmp_path): + test_file = tmp_path / "test_file.txt" + test_data = b"test_data" + save_metadata_bytes(test_file, test_data) + assert read_metadata_bytes(test_file) == test_data + + +def test_string_match_failure(): + expected = "expected" + actual = "exzected" + with pytest.raises(ValueError): + string_match_failure(expected, actual) diff --git a/tests/utils/utils.py b/tests/utils/utils.py new file mode 100644 index 00000000..cff046bc --- /dev/null +++ b/tests/utils/utils.py @@ -0,0 +1,70 @@ +# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved. +# Use of this source code is governed by a MIT license that can be found in the LICENSE file. +# SPDX-License-Identifier: MIT + +import os + + +def get_query_params(url: str) -> str: + pos = url.find("?") + if pos == -1: + return "" + return url[pos + 1 :] + + +def create_dirs(fullpath: str) -> None: + basedir = os.path.dirname(fullpath) + os.makedirs(basedir, mode=0o700, exist_ok=True) + + +def save_metadata_bytes(filename: str, data: bytes) -> None: + save_metadata_string(filename, data.decode()) + + +def save_metadata_string(filename: str, data: str) -> None: + # create directory + create_dirs(filename) + + # save metadata + with open(filename, "w", encoding="utf-8") as data_file: + data_file.write(data) + + +def read_metadata_string(filename: str) -> str: + with open(filename, "r", encoding="utf-8") as data_file: + return data_file.read() + + +def read_metadata_bytes(filename: str) -> bytes: + with open(filename, "rb") as data_file: + return data_file.read() + + +def string_match_failure(expected: str, actual: str) -> None: + if len(expected) != len(actual): + raise ValueError("string lengths don't match") + + found = -1 + for i in range(len(expected)): + if expected[i] != actual[i]: + found = i + break + + # expected + for i in range(len(expected)): + if i == found: + print(f"\033[0;31m {expected[i]}", end="") + else: + print(f"\033[0m {expected[i]}", end="") + print() + + # actual + for i in range(len(expected)): + if i == found: + print(f"\033[0;31m {actual[i]}", end="") + else: + print(f"\033[0m {actual[i]}", end="") + print() + + if found != -1: + raise ValueError(f"string mismatch at position {found}")