Skip to content

Commit cf8ff9d

Browse files
author
İLKER BORAHAN ARSLAN
committed
Fix select language error
1 parent 1a63918 commit cf8ff9d

File tree

3 files changed

+116
-13
lines changed

3 files changed

+116
-13
lines changed

app.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
import gradio as gr
44
from gradio_i18n import Translate, gettext as _
55
import yaml
6-
6+
import sys
77
from modules.utils.paths import (FASTER_WHISPER_MODELS_DIR, DIARIZATION_MODELS_DIR, OUTPUT_DIR, WHISPER_MODELS_DIR,
88
INSANELY_FAST_WHISPER_MODELS_DIR, NLLB_MODELS_DIR, DEFAULT_PARAMETERS_CONFIG_PATH,
99
UVR_MODELS_DIR, I18N_YAML_PATH)
1010
from modules.utils.files_manager import load_yaml
11+
from modules.utils.language import LANGUAGE_CODES
1112
from modules.whisper.whisper_factory import WhisperFactory
1213
from modules.translation.nllb_inference import NLLBInference
1314
from modules.ui.htmls import *
@@ -46,13 +47,15 @@ def create_pipeline_inputs(self):
4647
vad_params = self.default_params["vad"]
4748
diarization_params = self.default_params["diarization"]
4849
uvr_params = self.default_params["bgm_separation"]
49-
50+
choices = sorted([(language, code) for language, code in LANGUAGE_CODES.items()])
5051
with gr.Row():
5152
dd_model = gr.Dropdown(choices=self.whisper_inf.available_models, value=whisper_params["model_size"],
5253
label=_("Model"))
53-
dd_lang = gr.Dropdown(choices=self.whisper_inf.available_langs + [AUTOMATIC_DETECTION],
54-
value=AUTOMATIC_DETECTION if whisper_params["lang"] == AUTOMATIC_DETECTION.unwrap()
55-
else whisper_params["lang"], label=_("Language"))
54+
"""dd_lang = gr.Dropdown(choices=[lang.title() for lang in self.whisper_inf.available_langs] + [AUTOMATIC_DETECTION.title()],
55+
value=AUTOMATIC_DETECTION if whisper_params.get("lang") == AUTOMATIC_DETECTION else whisper_params.get("lang", AUTOMATIC_DETECTION),
56+
label=_("Language"))"""
57+
dd_lang = gr.Dropdown(choices=choices + [("Automatic Detection", "auto")],value="auto",label=_("Language")
58+
)
5659
dd_file_format = gr.Dropdown(choices=["SRT", "WebVTT", "txt", "LRC"], value="SRT", label=_("File Format"))
5760
with gr.Row():
5861
cb_translate = gr.Checkbox(value=whisper_params["is_translate"], label=_("Translate to English?"),

modules/utils/language.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
LANGUAGE_CODES = {
2+
"English": "en",
3+
"Chinese": "zh",
4+
"German": "de",
5+
"Spanish": "es",
6+
"Russian": "ru",
7+
"Korean": "ko",
8+
"French": "fr",
9+
"Japanese": "ja",
10+
"Portuguese": "pt",
11+
"Turkish": "tr",
12+
"Polish": "pl",
13+
"Catalan": "ca",
14+
"Dutch": "nl",
15+
"Arabic": "ar",
16+
"Swedish": "sv",
17+
"Italian": "it",
18+
"Indonesian": "id",
19+
"Hindi": "hi",
20+
"Finnish": "fi",
21+
"Vietnamese": "vi",
22+
"Hebrew": "he",
23+
"Ukrainian": "uk",
24+
"Greek": "el",
25+
"Malay": "ms",
26+
"Czech": "cs",
27+
"Romanian": "ro",
28+
"Danish": "da",
29+
"Hungarian": "hu",
30+
"Tamil": "ta",
31+
"Norwegian": "no",
32+
"Thai": "th",
33+
"Urdu": "ur",
34+
"Croatian": "hr",
35+
"Bulgarian": "bg",
36+
"Lithuanian": "lt",
37+
"Latin": "la",
38+
"Māori": "mi",
39+
"Malayalam": "ml",
40+
"Welsh": "cy",
41+
"Slovak": "sk",
42+
"Telugu": "te",
43+
"Persian": "fa",
44+
"Latvian": "lv",
45+
"Bengali": "bn",
46+
"Serbian": "sr",
47+
"Azerbaijani": "az",
48+
"Slovenian": "sl",
49+
"Kannada": "kn",
50+
"Estonian": "et",
51+
"Macedonian": "mk",
52+
"Breton": "br",
53+
"Basque": "eu",
54+
"Icelandic": "is",
55+
"Armenian": "hy",
56+
"Nepali": "ne",
57+
"Mongolian": "mn",
58+
"Bosnian": "bs",
59+
"Kazakh": "kk",
60+
"Albanian": "sq",
61+
"Swahili": "sw",
62+
"Galician": "gl",
63+
"Marathi": "mr",
64+
"Panjabi": "pa",
65+
"Sinhala": "si",
66+
"Khmer": "km",
67+
"Shona": "sn",
68+
"Yoruba": "yo",
69+
"Somali": "so",
70+
"Afrikaans": "af",
71+
"Occitan": "oc",
72+
"Georgian": "ka",
73+
"Belarusian": "be",
74+
"Tajik": "tg",
75+
"Sindhi": "sd",
76+
"Gujarati": "gu",
77+
"Amharic": "am",
78+
"Yiddish": "yi",
79+
"Lao": "lo",
80+
"Uzbek": "uz",
81+
"Faroese": "fo",
82+
"Haitian": "ht",
83+
"Pashto": "ps",
84+
"Turkmen": "tk",
85+
"Norwegian Nynorsk": "nn",
86+
"Maltese": "mt",
87+
"Sanskrit": "sa",
88+
"Luxembourgish": "lb",
89+
"Burmese": "my",
90+
"Tibetan": "bo",
91+
"Tagalog": "tl",
92+
"Malagasy": "mg",
93+
"Assamese": "as",
94+
"Tatar": "tt",
95+
"Hawaiian": "haw",
96+
"Lingala": "ln",
97+
"Hausa": "ha",
98+
"Bashkir": "ba",
99+
"jw": "jw",
100+
"Sundanese": "su",
101+
}

modules/whisper/base_transcription_pipeline.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import sys
23
import whisper
34
import ctranslate2
45
import gradio as gr
@@ -19,7 +20,7 @@
1920
from modules.whisper.data_classes import *
2021
from modules.diarize.diarizer import Diarizer
2122
from modules.vad.silero_vad import SileroVAD
22-
23+
from modules.utils.language import LANGUAGE_CODES
2324

2425
class BaseTranscriptionPipeline(ABC):
2526
def __init__(self,
@@ -486,13 +487,11 @@ def validate_gradio_values(params: TranscriptionPipelineParams):
486487
Validate gradio specific values that can't be displayed as None in the UI.
487488
Related issue : https://github.com/gradio-app/gradio/issues/8723
488489
"""
489-
if params.whisper.lang is None:
490-
pass
491-
elif params.whisper.lang == AUTOMATIC_DETECTION:
492-
params.whisper.lang = None
493-
else:
494-
language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()}
495-
params.whisper.lang = language_code_dict[params.lang]
490+
491+
if params.whisper.lang in LANGUAGE_CODES:
492+
params.whisper.lang = LANGUAGE_CODES[params.whisper.lang]
493+
elif params.whisper.lang not in LANGUAGE_CODES.values():
494+
params.whisper.lang = None # veya varsayılan olarak 'tr' yapabilirsiniz
496495

497496
if params.whisper.initial_prompt == GRADIO_NONE_STR:
498497
params.whisper.initial_prompt = None

0 commit comments

Comments
 (0)