avans06 commited on
Commit
77b92a2
1 Parent(s): 8c8a39e

If the user does not choose a language for Whisper,

Browse files

the detected language by Whisper will be automatically set for the nllb model to avoid abnormal errors when determining the source language in nllb.

Files changed (3) hide show
  1. app.py +16 -12
  2. src/vad.py +2 -2
  3. webui.bat +1 -1
app.py CHANGED
@@ -20,9 +20,7 @@ from src.diarization.diarizationContainer import DiarizationContainer
20
  from src.hooks.progressListener import ProgressListener
21
  from src.hooks.subTaskProgressListener import SubTaskProgressListener
22
  from src.hooks.whisperProgressHook import create_progress_listener_handle
23
- from src.languages import _TO_LANGUAGE_CODE
24
- from src.languages import get_language_names
25
- from src.languages import get_language_from_name
26
  from src.modelCache import ModelCache
27
  from src.prompts.jsonPromptStrategy import JsonPromptStrategy
28
  from src.prompts.prependPromptStrategy import PrependPromptStrategy
@@ -269,6 +267,10 @@ class WhisperTranscriber:
269
 
270
  # Transcribe
271
  result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
 
 
 
 
272
  short_name, suffix = source.get_short_name_suffix(max_length=self.app_config.input_max_file_name_length)
273
  filePrefix = slugify(source_prefix + short_name, allow_unicode=True)
274
 
@@ -700,8 +702,8 @@ def create_ui(app_config: ApplicationConfig):
700
 
701
  common_output = lambda : [
702
  gr.File(label="Download"),
703
- gr.Text(label="Transcription"),
704
- gr.Text(label="Segments"),
705
  ]
706
 
707
  is_queue_mode = app_config.queue_concurrency_count is not None and app_config.queue_concurrency_count > 0
@@ -863,13 +865,15 @@ if __name__ == '__main__':
863
 
864
  updated_config = default_app_config.update(**args)
865
 
866
- #updated_config.whisper_implementation = "faster-whisper"
867
- #updated_config.input_audio_max_duration = -1
868
- #updated_config.default_model_name = "large-v2"
869
- #updated_config.output_dir = "output"
870
- #updated_config.vad_max_merge_size = 90
871
- #updated_config.merge_subtitle_with_sources = True
872
- #updated_config.autolaunch = True
 
 
873
 
874
  if (threads := args.pop("threads")) > 0:
875
  torch.set_num_threads(threads)
 
20
  from src.hooks.progressListener import ProgressListener
21
  from src.hooks.subTaskProgressListener import SubTaskProgressListener
22
  from src.hooks.whisperProgressHook import create_progress_listener_handle
23
+ from src.languages import _TO_LANGUAGE_CODE, get_language_names, get_language_from_name, get_language_from_code
 
 
24
  from src.modelCache import ModelCache
25
  from src.prompts.jsonPromptStrategy import JsonPromptStrategy
26
  from src.prompts.prependPromptStrategy import PrependPromptStrategy
 
267
 
268
  # Transcribe
269
  result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
270
+ if whisper_lang is None and result["language"] is not None and len(result["language"]) > 0:
271
+ whisper_lang = get_language_from_code(result["language"])
272
+ nllb_model.whisper_lang = whisper_lang
273
+
274
  short_name, suffix = source.get_short_name_suffix(max_length=self.app_config.input_max_file_name_length)
275
  filePrefix = slugify(source_prefix + short_name, allow_unicode=True)
276
 
 
702
 
703
  common_output = lambda : [
704
  gr.File(label="Download"),
705
+ gr.Text(label="Transcription", autoscroll=False),
706
+ gr.Text(label="Segments", autoscroll=False),
707
  ]
708
 
709
  is_queue_mode = app_config.queue_concurrency_count is not None and app_config.queue_concurrency_count > 0
 
865
 
866
  updated_config = default_app_config.update(**args)
867
 
868
+ # updated_config.whisper_implementation = "faster-whisper"
869
+ # updated_config.input_audio_max_duration = -1
870
+ # updated_config.default_model_name = "large-v2"
871
+ # updated_config.output_dir = "output"
872
+ # updated_config.vad_max_merge_size = 90
873
+ # updated_config.merge_subtitle_with_sources = False
874
+ # updated_config.autolaunch = True
875
+ # updated_config.auto_parallel = False
876
+ # updated_config.save_downloaded_files = True
877
 
878
  if (threads := args.pop("threads")) > 0:
879
  torch.set_num_threads(threads)
src/vad.py CHANGED
@@ -205,7 +205,7 @@ class AbstractTranscription(ABC):
205
  # Detected language
206
  detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
207
 
208
- print("Running whisper from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ",
209
  segment_duration, "expanded: ", segment_expand_amount, ", prompt: ", segment_prompt, ", detected language: ", detected_language)
210
 
211
  perf_start_time = time.perf_counter()
@@ -217,7 +217,7 @@ class AbstractTranscription(ABC):
217
  segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
218
 
219
  perf_end_time = time.perf_counter()
220
- print("Whisper took {} seconds".format(perf_end_time - perf_start_time))
221
 
222
  adjusted_segments = self.adjust_timestamp(segment_result["segments"], adjust_seconds=segment_start, max_source_time=segment_duration)
223
 
 
205
  # Detected language
206
  detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
207
 
208
+ print(f"Running whisper {idx}: from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ",
209
  segment_duration, "expanded: ", segment_expand_amount, ", prompt: ", segment_prompt, ", detected language: ", detected_language)
210
 
211
  perf_start_time = time.perf_counter()
 
217
  segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
218
 
219
  perf_end_time = time.perf_counter()
220
+ print("\tWhisper took {} seconds".format(perf_end_time - perf_start_time))
221
 
222
  adjusted_segments = self.adjust_timestamp(segment_result["segments"], adjust_seconds=segment_start, max_source_time=segment_duration)
223
 
webui.bat CHANGED
@@ -1,7 +1,7 @@
1
  @echo off
2
 
3
  :: The source of the webui.bat file is stable-diffusion-webui
4
- set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 --merge_subtitle_with_sources --autolaunch
5
 
6
  if not defined PYTHON (set PYTHON=python)
7
  if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")
 
1
  @echo off
2
 
3
  :: The source of the webui.bat file is stable-diffusion-webui
4
+ set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 --save_downloaded_files --autolaunch
5
 
6
  if not defined PYTHON (set PYTHON=python)
7
  if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")