pustozerov commited on
Commit
ece7272
1 Parent(s): 4652f5c

Added auto-creation of manifests folder. Updated packages.txt.

Browse files
Files changed (2) hide show
  1. app.py +12 -1
  2. packages.txt +1 -6
app.py CHANGED
@@ -14,8 +14,10 @@ from modules.nlp.nemo_punct_cap import punctuation_capitalization
14
  FOLDER_WAV_DB = "data/database/"
15
  FOLDER_USER_DATA = "data/user_data/"
16
  FOLDER_USER_DATA_WAV = "data/user_data_wav/"
 
17
  SAMPLE_RATE = 16000
18
  dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
 
19
 
20
  st.title('Call Transcription demo')
21
  st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
@@ -79,10 +81,19 @@ if uploaded_file is not None:
79
  result = diarization(save_path)
80
  with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
81
  transcript = f.read()
82
- st.write("Transcription completed.")
 
 
 
 
 
 
 
 
83
  st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
84
  st.write("Sentences: %s" % len(result[file_name]["sentences"]))
85
  st.write("Words: %s" % len(result[file_name]["words"]))
 
86
  st.download_button(
87
  label="Download audio transcript",
88
  data=transcript,
 
14
  FOLDER_WAV_DB = "data/database/"
15
  FOLDER_USER_DATA = "data/user_data/"
16
  FOLDER_USER_DATA_WAV = "data/user_data_wav/"
17
+ FOLDER_MANIFESTS = "info/configs/manifests/"
18
  SAMPLE_RATE = 16000
19
  dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
20
+ os.makedirs(FOLDER_WAV_DB, exist_ok=True)
21
 
22
  st.title('Call Transcription demo')
23
  st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
 
81
  result = diarization(save_path)
82
  with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
83
  transcript = f.read()
84
+ st.write("Transcription completed. Starting assigning punctuation and capitalization.")
85
+ sentences = result[file_name]["sentences"]
86
+ all_strings = ""
87
+ for sentence in sentences:
88
+ all_strings = all_strings + sentence["sentence"] + "\n"
89
+ all_strings = punctuation_capitalization([all_strings])[0]
90
+ st.write("Punctuation and capitalization are ready. Starting named entity recognition.")
91
+ tagged_string, tags_summary = detect_ner(all_strings)
92
+ transcript = transcript + '\n' + tagged_string
93
  st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
94
  st.write("Sentences: %s" % len(result[file_name]["sentences"]))
95
  st.write("Words: %s" % len(result[file_name]["words"]))
96
+ st.write("Found named entities: %s" % tags_summary)
97
  st.download_button(
98
  label="Download audio transcript",
99
  data=transcript,
packages.txt CHANGED
@@ -6,9 +6,4 @@ python3-opencv
6
  unzip
7
  libc6
8
  libsm6
9
- libxext6
10
- libxcb-xinerama0
11
- wget
12
- libglfw3-dev
13
- libgles2-mesa-dev
14
- xvfb
 
6
  unzip
7
  libc6
8
  libsm6
9
+ libxext6