Spaces:

pourmand1376
/

Seamlessm4t_diarization_VAD

Running

App Files Files Community

a.pourmand commited on Sep 13, 2023

Commit

d147382

•

1 Parent(s): 16a3aec

add update

Browse files

Files changed (4) hide show

.gitignore +160 -0
app.py +63 -9
lang_list.py +258 -0
style.css +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

app.py CHANGED Viewed

@@ -1,28 +1,82 @@
 import gradio as gr
 import os
 HF_API = os.getenv("HF_API")
 API_URL = os.getenv("API_URL")  # path to Seamlessm4t API endpoint
 DESCRIPTION = """
 # Seamlessm4t + Speaker Diarization + Voice Activity Detection
-Here we use seamlessm4t to generate captions for full audios. Audio can be of arbitrary length. """
 DUPLICATE = """
 To duplicate this repo, you have to give permission from three reopsitories and accept all user conditions:
 1- https://huggingface.co/pyannote/voice-activity-detection
-2- hf.co/pyannote/segmentation
-3- hf.co/pyannote/speaker-diarization
 """
-with gr.Blocks() as demo:
     gr.Markdown(DESCRIPTION)
-    target_language = gr.Dropdown(choices=["French", "Spanish", "German"])
-    audio_source = gr.Radio(choices=["microphone", "file"])
-    mic_input = gr.Audio(source="microphone", visible=False)
-    file_input = gr.Audio(source="upload", visible=False)
-    output = gr.Textbox()
     gr.Markdown(DUPLICATE)
 demo.queue(max_size=50).launch()

+from cProfile import label
+from email.policy import default
+from altair import value
 import gradio as gr
 import os
+from lang_list import TEXT_SOURCE_LANGUAGE_NAMES
 HF_API = os.getenv("HF_API")
 API_URL = os.getenv("API_URL")  # path to Seamlessm4t API endpoint
+DEFAULT_TARGET_LANGUAGE = "Western Persian"
 DESCRIPTION = """
 # Seamlessm4t + Speaker Diarization + Voice Activity Detection
+Here we use seamlessm4t to generate captions for full audios. Audio can be of arbitrary length.
+"""
 DUPLICATE = """
 To duplicate this repo, you have to give permission from three reopsitories and accept all user conditions:
 1- https://huggingface.co/pyannote/voice-activity-detection
+2- https://hf.co/pyannote/segmentation
+3- https://hf.co/pyannote/speaker-diarization
 """
+def update_audio_ui(audio_source: str) -> tuple[dict, dict]:
+    mic = audio_source == "microphone"
+    return (
+        gr.update(visible=mic, value=None),  # input_audio_mic
+        gr.update(visible=not mic, value=None),  # input_audio_file
+    )
+with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
+    with gr.Group():
+        target_language = gr.Dropdown(
+            choices=TEXT_SOURCE_LANGUAGE_NAMES,
+            label="Output Language",
+            value=DEFAULT_TARGET_LANGUAGE,
+            interactive=True,
+        )
+        target_language.update(value=DEFAULT_TARGET_LANGUAGE)
+        with gr.Row() as audio_box:
+            audio_source = gr.Radio(
+                choices=["file", "microphone"], value="file", interactive=True
+            )
+            input_audio_mic = gr.Audio(
+                label="Input speech",
+                type="filepath",
+                source="microphone",
+                visible=False,
+            )
+            input_audio_file = gr.Audio(
+                label="Input speech",
+                type="filepath",
+                source="upload",
+                visible=True,
+            )
+            output = gr.Audio(label="Output", visible=False)
+        audio_source.change(
+            fn=update_audio_ui,
+            inputs=audio_source,
+            outputs=[input_audio_mic, input_audio_file],
+            queue=False,
+            api_name=False,
+        )
+        input_audio_mic.change(lambda x: x, input_audio_mic, output)
+        input_audio_file.change(lambda x: x, input_audio_file, output)
+        submit = gr.Button("Submit")
+        text_output = gr.Textbox(label="Transcribed Text", value="", interactive=False)
     gr.Markdown(DUPLICATE)
 demo.queue(max_size=50).launch()

lang_list.py ADDED Viewed

	@@ -0,0 +1,258 @@

+# Language dict
+language_code_to_name = {
+    "afr": "Afrikaans",
+    "amh": "Amharic",
+    "arb": "Modern Standard Arabic",
+    "ary": "Moroccan Arabic",
+    "arz": "Egyptian Arabic",
+    "asm": "Assamese",
+    "ast": "Asturian",
+    "azj": "North Azerbaijani",
+    "bel": "Belarusian",
+    "ben": "Bengali",
+    "bos": "Bosnian",
+    "bul": "Bulgarian",
+    "cat": "Catalan",
+    "ceb": "Cebuano",
+    "ces": "Czech",
+    "ckb": "Central Kurdish",
+    "cmn": "Mandarin Chinese",
+    "cym": "Welsh",
+    "dan": "Danish",
+    "deu": "German",
+    "ell": "Greek",
+    "eng": "English",
+    "est": "Estonian",
+    "eus": "Basque",
+    "fin": "Finnish",
+    "fra": "French",
+    "gaz": "West Central Oromo",
+    "gle": "Irish",
+    "glg": "Galician",
+    "guj": "Gujarati",
+    "heb": "Hebrew",
+    "hin": "Hindi",
+    "hrv": "Croatian",
+    "hun": "Hungarian",
+    "hye": "Armenian",
+    "ibo": "Igbo",
+    "ind": "Indonesian",
+    "isl": "Icelandic",
+    "ita": "Italian",
+    "jav": "Javanese",
+    "jpn": "Japanese",
+    "kam": "Kamba",
+    "kan": "Kannada",
+    "kat": "Georgian",
+    "kaz": "Kazakh",
+    "kea": "Kabuverdianu",
+    "khk": "Halh Mongolian",
+    "khm": "Khmer",
+    "kir": "Kyrgyz",
+    "kor": "Korean",
+    "lao": "Lao",
+    "lit": "Lithuanian",
+    "ltz": "Luxembourgish",
+    "lug": "Ganda",
+    "luo": "Luo",
+    "lvs": "Standard Latvian",
+    "mai": "Maithili",
+    "mal": "Malayalam",
+    "mar": "Marathi",
+    "mkd": "Macedonian",
+    "mlt": "Maltese",
+    "mni": "Meitei",
+    "mya": "Burmese",
+    "nld": "Dutch",
+    "nno": "Norwegian Nynorsk",
+    "nob": "Norwegian Bokm\u00e5l",
+    "npi": "Nepali",
+    "nya": "Nyanja",
+    "oci": "Occitan",
+    "ory": "Odia",
+    "pan": "Punjabi",
+    "pbt": "Southern Pashto",
+    "pes": "Western Persian",
+    "pol": "Polish",
+    "por": "Portuguese",
+    "ron": "Romanian",
+    "rus": "Russian",
+    "slk": "Slovak",
+    "slv": "Slovenian",
+    "sna": "Shona",
+    "snd": "Sindhi",
+    "som": "Somali",
+    "spa": "Spanish",
+    "srp": "Serbian",
+    "swe": "Swedish",
+    "swh": "Swahili",
+    "tam": "Tamil",
+    "tel": "Telugu",
+    "tgk": "Tajik",
+    "tgl": "Tagalog",
+    "tha": "Thai",
+    "tur": "Turkish",
+    "ukr": "Ukrainian",
+    "urd": "Urdu",
+    "uzn": "Northern Uzbek",
+    "vie": "Vietnamese",
+    "xho": "Xhosa",
+    "yor": "Yoruba",
+    "yue": "Cantonese",
+    "zlm": "Colloquial Malay",
+    "zsm": "Standard Malay",
+    "zul": "Zulu",
+}
+LANGUAGE_NAME_TO_CODE = {v: k for k, v in language_code_to_name.items()}
+# Source langs: S2ST / S2TT / ASR don't need source lang
+# T2TT / T2ST use this
+text_source_language_codes = [
+    "afr",
+    "amh",
+    "arb",
+    "ary",
+    "arz",
+    "asm",
+    "azj",
+    "bel",
+    "ben",
+    "bos",
+    "bul",
+    "cat",
+    "ceb",
+    "ces",
+    "ckb",
+    "cmn",
+    "cym",
+    "dan",
+    "deu",
+    "ell",
+    "eng",
+    "est",
+    "eus",
+    "fin",
+    "fra",
+    "gaz",
+    "gle",
+    "glg",
+    "guj",
+    "heb",
+    "hin",
+    "hrv",
+    "hun",
+    "hye",
+    "ibo",
+    "ind",
+    "isl",
+    "ita",
+    "jav",
+    "jpn",
+    "kan",
+    "kat",
+    "kaz",
+    "khk",
+    "khm",
+    "kir",
+    "kor",
+    "lao",
+    "lit",
+    "lug",
+    "luo",
+    "lvs",
+    "mai",
+    "mal",
+    "mar",
+    "mkd",
+    "mlt",
+    "mni",
+    "mya",
+    "nld",
+    "nno",
+    "nob",
+    "npi",
+    "nya",
+    "ory",
+    "pan",
+    "pbt",
+    "pes",
+    "pol",
+    "por",
+    "ron",
+    "rus",
+    "slk",
+    "slv",
+    "sna",
+    "snd",
+    "som",
+    "spa",
+    "srp",
+    "swe",
+    "swh",
+    "tam",
+    "tel",
+    "tgk",
+    "tgl",
+    "tha",
+    "tur",
+    "ukr",
+    "urd",
+    "uzn",
+    "vie",
+    "yor",
+    "yue",
+    "zsm",
+    "zul",
+]
+TEXT_SOURCE_LANGUAGE_NAMES = sorted(
+    [language_code_to_name[code] for code in text_source_language_codes]
+)
+# Target langs:
+# S2ST / T2ST
+s2st_target_language_codes = [
+    "eng",
+    "arb",
+    "ben",
+    "cat",
+    "ces",
+    "cmn",
+    "cym",
+    "dan",
+    "deu",
+    "est",
+    "fin",
+    "fra",
+    "hin",
+    "ind",
+    "ita",
+    "jpn",
+    "kor",
+    "mlt",
+    "nld",
+    "pes",
+    "pol",
+    "por",
+    "ron",
+    "rus",
+    "slk",
+    "spa",
+    "swe",
+    "swh",
+    "tel",
+    "tgl",
+    "tha",
+    "tur",
+    "ukr",
+    "urd",
+    "uzn",
+    "vie",
+]
+S2ST_TARGET_LANGUAGE_NAMES = sorted(
+    [language_code_to_name[code] for code in s2st_target_language_codes]
+)
+# S2TT / ASR
+S2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES
+# T2TT
+T2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES

style.css ADDED Viewed

	@@ -0,0 +1,3 @@

+h1 {
+  text-align: center;
+}