pustozerov commited on
Commit
bf9a369
1 Parent(s): 5f36b24

Implemented punctuation and capitalization into the Streamlit interface.

Browse files
Files changed (2) hide show
  1. app.py +7 -4
  2. modules/nlp/nemo_punct_cap.py +8 -0
app.py CHANGED
@@ -9,6 +9,7 @@ from scipy.io.wavfile import write
9
 
10
  from modules.diarization.nemo_diarization import diarization
11
  from modules.nlp.nemo_ner import detect_ner
 
12
 
13
  FOLDER_WAV_DB = "data/database/"
14
  FOLDER_USER_DATA = "data/user_data/"
@@ -19,7 +20,7 @@ dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
19
  st.title('Call Transcription demo')
20
  st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
21
  'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
22
- 'pickup a set of images from the built-in database or try uploading your own files.')
23
  if st.button('Try a random sample from the database'):
24
  os.makedirs(FOLDER_WAV_DB, exist_ok=True)
25
  shuffled_dataset = dataset.shuffle(seed=random.randint(0, 100))
@@ -32,13 +33,15 @@ if st.button('Try a random sample from the database'):
32
  st.audio(audio_file.read())
33
  st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
34
  result = diarization(os.path.join(FOLDER_WAV_DB, file_name + '.wav'))
 
 
 
35
  sentences = result[file_name]["sentences"]
36
  all_strings = ""
37
  for sentence in sentences:
38
  all_strings = all_strings + sentence["sentence"] + "\n"
39
- with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
40
- transcript = f.read()
41
- st.write("Transcription completed. Starting named entity recognition.")
42
  tagged_string, tags_summary = detect_ner(all_strings)
43
  transcript = transcript + '\n' + tagged_string
44
  st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
 
9
 
10
  from modules.diarization.nemo_diarization import diarization
11
  from modules.nlp.nemo_ner import detect_ner
12
+ from modules.nlp.nemo_punct_cap import punctuation_capitalization
13
 
14
  FOLDER_WAV_DB = "data/database/"
15
  FOLDER_USER_DATA = "data/user_data/"
 
20
  st.title('Call Transcription demo')
21
  st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
22
  'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
23
+ 'pickup an audio file with the dialogue from the built-in database or try uploading your own files.')
24
  if st.button('Try a random sample from the database'):
25
  os.makedirs(FOLDER_WAV_DB, exist_ok=True)
26
  shuffled_dataset = dataset.shuffle(seed=random.randint(0, 100))
 
33
  st.audio(audio_file.read())
34
  st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
35
  result = diarization(os.path.join(FOLDER_WAV_DB, file_name + '.wav'))
36
+ with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
37
+ transcript = f.read()
38
+ st.write("Transcription completed. Starting assigning punctuation and capitalization.")
39
  sentences = result[file_name]["sentences"]
40
  all_strings = ""
41
  for sentence in sentences:
42
  all_strings = all_strings + sentence["sentence"] + "\n"
43
+ all_strings = punctuation_capitalization([all_strings])[0]
44
+ st.write("Punctuation and capitalization are ready. Starting named entity recognition.")
 
45
  tagged_string, tags_summary = detect_ner(all_strings)
46
  transcript = transcript + '\n' + tagged_string
47
  st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
modules/nlp/nemo_punct_cap.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from nemo.collections.nlp.models import PunctuationCapitalizationModel
2
+
3
+
4
+ punctuation_capitalization_model = PunctuationCapitalizationModel.from_pretrained("punctuation_en_distilbert")
5
+
6
+
7
+ def punctuation_capitalization(text):
8
+ return punctuation_capitalization_model.add_punctuation_capitalization(text)