From 301053769e43171409db555129b1f847ee5cddec Mon Sep 17 00:00:00 2001 From: jiltseb Date: Tue, 8 Oct 2024 19:29:27 +0000 Subject: [PATCH 1/7] handle case when audio is empty, e.g. due to VAD filtering all segments --- faster_whisper/transcribe.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index 8652e82b..df4039a0 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -944,13 +944,19 @@ def transcribe( ) seek += segment.shape[-1] else: - # If no language detected for all segments, the majority vote of the highest - # projected languages for all segments is used to determine the language. - language = max( - detected_language_info, - key=lambda lang: len(detected_language_info[lang]), - ) - language_probability = max(detected_language_info[language]) + if detected_language_info: + # If no language detected for all segments, the majority vote of the highest + # projected languages for all segments is used to determine the language. + language = max( + detected_language_info, + key=lambda lang: len(detected_language_info[lang]), + ) + language_probability = max(detected_language_info[language]) + else: + # It's possible VAD removes all segments due to no voice, + # then it doesn't matter which language + language = 'en' + language_probability = 0 self.logger.info( "Detected language '%s' with probability %.2f", From c9390166ed758e8986c33ab69d8a029fb5539316 Mon Sep 17 00:00:00 2001 From: jiltseb Date: Tue, 8 Oct 2024 19:31:28 +0000 Subject: [PATCH 2/7] add turbo model --- faster_whisper/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/faster_whisper/utils.py b/faster_whisper/utils.py index 481bd748..f31e42fd 100644 --- a/faster_whisper/utils.py +++ b/faster_whisper/utils.py @@ -26,6 +26,8 @@ "distil-medium.en": "Systran/faster-distil-whisper-medium.en", "distil-small.en": "Systran/faster-distil-whisper-small.en", "distil-large-v3": "Systran/faster-distil-whisper-large-v3", + "large-v3-trubo": "mobiuslabsgmbh/faster-whisper-large-v3-turbo", + "turbo": "mobiuslabsgmbh/faster-whisper-large-v3-turbo", } From e0c61340a9212432931529231f0dd1673cf9dd3b Mon Sep 17 00:00:00 2001 From: jiltseb Date: Tue, 8 Oct 2024 19:32:31 +0000 Subject: [PATCH 3/7] limit pytorch version to cudnn8 for pip install --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index e0a3afba..a684d4be 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ huggingface_hub>=0.13 tokenizers>=0.13,<1 onnxruntime>=1.14,<2 pyannote-audio>=3.1.1 -torch>=2.1.1 -torchaudio>=2.1.2 +torch>=2.1.1,<2.4.0 +torchaudio>=2.1.2,<2.4.0 tqdm \ No newline at end of file From 0453cccc3a26056827f13c0ab8d66aeaefbc0b2c Mon Sep 17 00:00:00 2001 From: jiltseb Date: Tue, 8 Oct 2024 19:37:40 +0000 Subject: [PATCH 4/7] formatting checks --- faster_whisper/transcribe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index df4039a0..08585d1d 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -953,9 +953,9 @@ def transcribe( ) language_probability = max(detected_language_info[language]) else: - # It's possible VAD removes all segments due to no voice, + # It's possible VAD removes all segments due to no voice, # then it doesn't matter which language - language = 'en' + language = "en" language_probability = 0 self.logger.info( From 8af76429080ae934326b304923dc2b20694c0655 Mon Sep 17 00:00:00 2001 From: jiltseb Date: Wed, 9 Oct 2024 14:57:07 +0000 Subject: [PATCH 5/7] fixes no_active_speech vad error --- faster_whisper/transcribe.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index 08585d1d..8bda56dd 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -520,13 +520,17 @@ def transcribe( audio_segments = torch.nested.nested_tensor(audio_segments).to_padded_tensor( padding=0 ) - features = torch.stack( - [ - self.model.feature_extractor(audio_segment, to_cpu=to_cpu)[ - ..., : self.model.feature_extractor.nb_max_frames + features = ( + torch.stack( + [ + self.model.feature_extractor(audio_segment, to_cpu=to_cpu)[ + ..., : self.model.feature_extractor.nb_max_frames + ] + for audio_segment in audio_segments ] - for audio_segment in audio_segments - ] + ) + if duration_after_vad + else [] ) segments = self._batched_segments_generator( From b9f9c883d73a1cc7c1a76ac4d48a3be821000078 Mon Sep 17 00:00:00 2001 From: Hossein Rashidi Date: Mon, 14 Oct 2024 12:24:29 +0200 Subject: [PATCH 6/7] add publish action --- .github/workflows/publish.yml | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 .github/workflows/publish.yml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 00000000..7f142ff4 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,53 @@ +name: Publish Python Package + +on: + release: + types: [published] + workflow_dispatch: + inputs: + publish_target: + description: 'Select the target PyPI repository' + required: true + default: 'testpypi' + type: choice + options: + - pypi + - testpypi + +jobs: + publish: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install publish requirements + run: pip3 install setuptools wheel twine + + - name: Install build dependencies + run: pip3 install -r requirements.txt + + - name: Build the package + run: python3 -m build + + - name: Publish to PyPI + if: github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_target == 'pypi') + env: + TWINE_USERNAME: "__token__" + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: | + twine upload -r testpypi dist/* + + - name: Publish to Test PyPI + if: github.event_name == 'workflow_dispatch' && github.event.inputs.publish_target == 'testpypi' + env: + TWINE_USERNAME: "__token__" + TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }} + run: | + twine upload dist/* From 010300c8a9d88cb5f5daf2c03f7c1c01e26b10ec Mon Sep 17 00:00:00 2001 From: Hossein Rashidi Date: Mon, 14 Oct 2024 12:28:46 +0200 Subject: [PATCH 7/7] fix build --- .github/workflows/publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7f142ff4..b38040dc 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -31,10 +31,10 @@ jobs: run: pip3 install setuptools wheel twine - name: Install build dependencies - run: pip3 install -r requirements.txt + run: pip3 install wheel - name: Build the package - run: python3 -m build + run: python3 setup.py sdist bdist_wheel - name: Publish to PyPI if: github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_target == 'pypi')