Skip to content

Commit 7807c8f

Browse files
authored
Merge pull request #620 from rmusser01/lang-config
Lang config
2 parents ef17219 + 05c6230 commit 7807c8f

17 files changed

+832
-26
lines changed

.DS_Store

8 KB
Binary file not shown.

App_Function_Libraries/Audio/Audio_Files.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
sanitize_filename, generate_unique_id, temp_files, logging
3737
from App_Function_Libraries.Video_DL_Ingestion_Lib import extract_metadata
3838
from App_Function_Libraries.Audio.Audio_Transcription_Lib import speech_to_text
39+
from App_Function_Libraries.Utils.Whisper_Languages import get_language_code
3940
from App_Function_Libraries.Chunk_Lib import improved_chunking_process
4041
#
4142
#######################################################################################################################
@@ -104,7 +105,7 @@ def download_audio_file(url, current_whisper_model="", use_cookies=False, cookie
104105
logging.error(f"Unexpected error downloading audio file: {str(e)}")
105106
raise
106107

107-
def process_audio_files(audio_urls, audio_files, whisper_model, api_name, api_key, use_cookies, cookies, keep_original,
108+
def process_audio_files(audio_urls, audio_files, whisper_model, transcription_language, api_name, api_key, use_cookies, cookies, keep_original,
108109
custom_keywords, custom_prompt_input, chunk_method, max_chunk_size, chunk_overlap,
109110
use_adaptive_chunking, use_multi_level_chunking, chunk_language, diarize,
110111
keep_timestamps, custom_title, record_system_audio, recording_duration,
@@ -117,6 +118,9 @@ def process_audio_files(audio_urls, audio_files, whisper_model, api_name, api_ke
117118
if not system_audio_device:
118119
raise ValueError("Please select an audio output device to record from")
119120

121+
# Convert language name to code
122+
lang_code = get_language_code(transcription_language) if transcription_language else "auto"
123+
120124
# Add recording logic before processing files
121125
recorded_files = []
122126
start_time = time.time() # Start time for processing
@@ -260,7 +264,7 @@ def convert_mp3_to_wav(mp3_file_path):
260264
temp_files.append(wav_file_path)
261265

262266
# Transcribe audio
263-
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=diarize)
267+
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, selected_source_lang=lang_code, diarize=diarize)
264268

265269
# Handle segments format
266270
if isinstance(segments, dict) and 'segments' in segments:
@@ -341,7 +345,7 @@ def convert_mp3_to_wav(mp3_file_path):
341345
temp_files.append(wav_file_path)
342346

343347
# Transcribe audio
344-
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=diarize)
348+
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, selected_source_lang=lang_code, diarize=diarize)
345349

346350
if isinstance(segments, dict) and 'segments' in segments:
347351
segments = segments['segments']
@@ -528,7 +532,7 @@ def download_youtube_audio(url):
528532

529533

530534
def process_podcast(url, title, author, keywords, custom_prompt, api_name, api_key, whisper_model,
531-
keep_original=False, enable_diarization=False, use_cookies=False, cookies=None,
535+
transcription_language=None, keep_original=False, enable_diarization=False, use_cookies=False, cookies=None,
532536
chunk_method=None, max_chunk_size=300, chunk_overlap=0, use_adaptive_chunking=False,
533537
use_multi_level_chunking=False, chunk_language='english', keep_timestamps=True):
534538
"""
@@ -595,6 +599,9 @@ def cleanup_files():
595599
progress = [] # Initialize progress messages
596600

597601
try:
602+
# Convert language name to code
603+
lang_code = get_language_code(transcription_language) if transcription_language else "auto"
604+
598605
# Handle cookies if required
599606
if use_cookies:
600607
cookies = json.loads(cookies)
@@ -639,9 +646,9 @@ def cleanup_files():
639646
# Transcribe the podcast audio
640647
try:
641648
if enable_diarization:
642-
segments = speech_to_text(audio_file, whisper_model=whisper_model, diarize=True)
649+
segments = speech_to_text(audio_file, whisper_model=whisper_model, selected_source_lang=lang_code, diarize=True)
643650
else:
644-
segments = speech_to_text(audio_file, whisper_model=whisper_model)
651+
segments = speech_to_text(audio_file, whisper_model=whisper_model, selected_source_lang=lang_code)
645652
# SEems like this could be optimized... FIXME
646653
def format_segment(segment):
647654
start = segment.get('start', 0)

App_Function_Libraries/Audio/Audio_Transcription_Lib.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -297,15 +297,19 @@ def parse_device_id(selected_device_text: str):
297297

298298
##########################################################
299299
# Transcription Sink Function
300-
def transcribe_audio(audio_data: np.ndarray, transcription_provider, sample_rate: int = 16000, speaker_lang=None, whisper_model="distil-large-v3") -> str:
300+
def transcribe_audio(audio_data: np.ndarray, transcription_provider=None, sample_rate: int = 16000, speaker_lang=None, whisper_model=None) -> str:
301301
"""
302302
Unified transcribe entry point.
303303
Chooses faster-whisper or Qwen2Audio based on config.
304304
"""
305305
loaded_config_data = load_and_log_configs()
306306
if not transcription_provider:
307307
# Load default transcription provider via config file
308-
transcription_provider = loaded_config_data['STT-Settings']['default_transcriber']
308+
transcription_provider = loaded_config_data['STT_Settings']['default_stt_provider']
309+
if whisper_model is None:
310+
whisper_model = loaded_config_data['STT_Settings'].get('default_whisper_model', 'distil-large-v3')
311+
if speaker_lang is None:
312+
speaker_lang = loaded_config_data['STT_Settings'].get('default_stt_language', 'en')
309313

310314
if transcription_provider.lower() == 'qwen2audio':
311315
logging.info("Transcribing using Qwen2Audio")
@@ -650,15 +654,25 @@ def format_time(total_seconds: float) -> str:
650654

651655
def speech_to_text(
652656
audio_file_path: str,
653-
whisper_model: str = 'distil-large-v3',
654-
selected_source_lang: str = 'en', # Changed order of parameters
655-
vad_filter: bool = False,
657+
whisper_model: str = None,
658+
selected_source_lang: str = None, # Changed order of parameters
659+
vad_filter: bool = None,
656660
diarize: bool = False
657661
):
658662
"""
659663
Transcribe audio to text using a Whisper model and optionally handle diarization.
660664
Saves JSON output to {filename}-whisper_model-{model}.segments.json in the same directory.
661665
"""
666+
667+
# Load defaults from config if not provided
668+
loaded_config_data = load_and_log_configs()
669+
if whisper_model is None:
670+
whisper_model = loaded_config_data['STT_Settings'].get('default_whisper_model', 'distil-large-v3')
671+
if selected_source_lang is None:
672+
selected_source_lang = loaded_config_data['STT_Settings'].get('default_stt_language', 'en')
673+
if vad_filter is None:
674+
vad_filter_str = loaded_config_data['STT_Settings'].get('default_vad_filter', 'false')
675+
vad_filter = vad_filter_str.lower() == 'true'
662676

663677
log_counter("speech_to_text_attempt", labels={"file_path": audio_file_path, "model": whisper_model})
664678
time_start = time.time()

App_Function_Libraries/Gradio_Related.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ def launch_ui(share_public=None, server_mode=False):
403403
#migrate_media_db_to_rag_chat_db(media_db_path, rag_chat_db_path)
404404

405405

406-
with gr.Blocks(theme='bethecloud/storj_theme',css=css) as iface:
406+
with gr.Blocks(theme='default',css=css) as iface:
407407
gr.HTML(
408408
"""
409409
<script>
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
# Gradio_Related_Fixed.py
2+
# Enhanced version with better error handling and fixes
3+
4+
import os
5+
import sys
6+
import webbrowser
7+
import traceback
8+
import gradio as gr
9+
from pathlib import Path
10+
11+
# Import with error handling
12+
try:
13+
from App_Function_Libraries.DB.DB_Manager import get_db_config
14+
from App_Function_Libraries.DB.RAG_QA_Chat_DB import create_tables
15+
from App_Function_Libraries.Utils.Utils import load_and_log_configs, logging
16+
except ImportError as e:
17+
print(f"Import error: {e}")
18+
print("Make sure all dependencies are installed and paths are correct.")
19+
sys.exit(1)
20+
21+
# Ensure directories exist
22+
def ensure_directories():
23+
"""Create necessary directories if they don't exist"""
24+
dirs = [
25+
'Databases',
26+
'Logs',
27+
'Config_Files'
28+
]
29+
for dir_path in dirs:
30+
Path(dir_path).mkdir(exist_ok=True)
31+
32+
def safe_import_tabs():
33+
"""Import all tab creation functions with error handling"""
34+
tab_imports = {}
35+
36+
# List of all tab modules to import
37+
tab_modules = [
38+
('Anki_tab', ['create_anki_validation_tab', 'create_anki_generator_tab']),
39+
('Arxiv_tab', ['create_arxiv_tab']),
40+
('Audio_ingestion_tab', ['create_audio_processing_tab']),
41+
('Video_transcription_tab', ['create_video_transcription_tab']),
42+
# Add more as needed
43+
]
44+
45+
for module_name, functions in tab_modules:
46+
try:
47+
module = __import__(f'App_Function_Libraries.Gradio_UI.{module_name}', fromlist=functions)
48+
for func_name in functions:
49+
tab_imports[func_name] = getattr(module, func_name)
50+
except Exception as e:
51+
logging.error(f"Failed to import {module_name}: {e}")
52+
# Create a dummy function that shows an error tab
53+
for func_name in functions:
54+
tab_imports[func_name] = lambda: gr.Markdown(f"Error loading {func_name}: {str(e)}")
55+
56+
return tab_imports
57+
58+
def launch_ui_safe(share_public=None, server_mode=False, demo_mode=False):
59+
"""Enhanced launch_ui with better error handling"""
60+
61+
# Ensure directories exist
62+
ensure_directories()
63+
64+
# Don't open browser in demo mode
65+
if not demo_mode:
66+
try:
67+
webbrowser.open_new_tab('http://127.0.0.1:7860/?__theme=dark')
68+
except Exception as e:
69+
logging.warning(f"Could not open browser: {e}")
70+
71+
share = share_public
72+
73+
# CSS styling
74+
css = """
75+
.result-box {
76+
margin-bottom: 20px;
77+
border: 1px solid #ddd;
78+
padding: 10px;
79+
}
80+
.result-box.error {
81+
border-color: #ff0000;
82+
background-color: #ffeeee;
83+
}
84+
.transcription, .summary {
85+
max-height: 800px;
86+
overflow-y: auto;
87+
border: 1px solid #eee;
88+
padding: 10px;
89+
margin-top: 10px;
90+
}
91+
#scrollable-textbox textarea {
92+
max-height: 500px !important;
93+
overflow-y: auto !important;
94+
}
95+
"""
96+
97+
try:
98+
# Load configuration with error handling
99+
config = load_and_log_configs()
100+
if not config:
101+
logging.error("Failed to load configuration")
102+
config = {'db_config': {'sqlite_path': './Databases/media_db.db', 'type': 'sqlite'}}
103+
104+
# Get database paths
105+
db_config = config.get('db_config', {})
106+
media_db_path = db_config.get('sqlite_path', './Databases/media_db.db')
107+
108+
# Ensure database directory exists
109+
db_dir = os.path.dirname(media_db_path)
110+
if not os.path.exists(db_dir):
111+
os.makedirs(db_dir, exist_ok=True)
112+
logging.info(f"Created database directory: {db_dir}")
113+
114+
character_chat_db_path = os.path.join(db_dir, "chatDB.db")
115+
rag_chat_db_path = os.path.join(db_dir, "rag_qa.db")
116+
117+
# Initialize databases with error handling
118+
try:
119+
create_tables()
120+
logging.info("Database tables created successfully")
121+
except Exception as e:
122+
logging.error(f"Error creating database tables: {e}")
123+
124+
# Import all tab functions
125+
tabs = safe_import_tabs()
126+
127+
# Create Gradio interface
128+
with gr.Blocks(theme='default', css=css) as iface:
129+
# Add dark mode script
130+
gr.HTML("""
131+
<script>
132+
document.addEventListener('DOMContentLoaded', (event) => {
133+
document.body.classList.add('dark');
134+
document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)';
135+
});
136+
</script>
137+
""")
138+
139+
# Get database type
140+
db_type = db_config.get('type', 'sqlite')
141+
142+
# Header
143+
gr.Markdown("# tl/dw: Your LLM-powered Research Multi-tool")
144+
gr.Markdown(f"(Using {db_type.capitalize()} Database)")
145+
146+
# Create minimal interface for testing
147+
with gr.Tabs():
148+
with gr.TabItem("Status", id="status"):
149+
gr.Markdown("## System Status")
150+
gr.Markdown(f"✅ Application loaded successfully")
151+
gr.Markdown(f"📁 Database path: {media_db_path}")
152+
gr.Markdown(f"🗄️ Database type: {db_type}")
153+
154+
with gr.TabItem("Test", id="test"):
155+
gr.Markdown("## Test Tab")
156+
test_input = gr.Textbox(label="Test Input")
157+
test_output = gr.Textbox(label="Test Output")
158+
test_button = gr.Button("Test")
159+
160+
def test_function(text):
161+
return f"Echo: {text}"
162+
163+
test_button.click(test_function, inputs=test_input, outputs=test_output)
164+
165+
# Launch settings
166+
server_port = int(os.getenv('GRADIO_SERVER_PORT', 7860))
167+
168+
# Disable analytics
169+
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
170+
171+
# Launch the interface
172+
launch_kwargs = {
173+
'share': share,
174+
'server_port': server_port,
175+
'show_error': True
176+
}
177+
178+
if server_mode:
179+
launch_kwargs['server_name'] = "0.0.0.0"
180+
181+
try:
182+
iface.launch(**launch_kwargs)
183+
except Exception as e:
184+
logging.error(f"Error launching Gradio interface: {e}")
185+
# Try alternative port
186+
logging.info("Trying alternative port 7861...")
187+
launch_kwargs['server_port'] = 7861
188+
iface.launch(**launch_kwargs)
189+
190+
except Exception as e:
191+
logging.error(f"Critical error in launch_ui: {e}")
192+
logging.error(traceback.format_exc())
193+
194+
# Create minimal error interface
195+
with gr.Blocks() as error_iface:
196+
gr.Markdown("# Error Loading Application")
197+
gr.Markdown(f"An error occurred: {str(e)}")
198+
gr.Markdown("Please check the logs for more information.")
199+
200+
error_iface.launch(share=False, server_port=7860)

App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from App_Function_Libraries.DB.DB_Manager import list_prompts
1919
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
2020
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models
21+
from App_Function_Libraries.Utils.Whisper_Languages import get_whisper_language_list, get_language_code
2122
from App_Function_Libraries.Utils.Utils import cleanup_temp_files, default_api_endpoint, global_api_endpoints, \
2223
format_api_name, logging
2324
# Import metrics logging
@@ -407,6 +408,21 @@ def toggle_cookies_box(x):
407408
value="distil-large-v3",
408409
label="Whisper Model"
409410
)
411+
412+
# Add language selection dropdown
413+
from App_Function_Libraries.Utils.Utils import load_and_log_configs
414+
loaded_config_data = load_and_log_configs()
415+
default_lang = loaded_config_data['STT_Settings'].get('default_stt_language', 'en')
416+
language_choices = get_whisper_language_list()
417+
default_lang_name = next((name for code, name in language_choices if code == default_lang), "English")
418+
419+
transcription_language = gr.Dropdown(
420+
choices=[name for code, name in language_choices],
421+
value=default_lang_name,
422+
label="Transcription Language",
423+
info="Select the language of the audio, or use Auto-detect"
424+
)
425+
410426
keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)
411427

412428
with gr.Row():
@@ -619,6 +635,7 @@ def toggle_chunking(x):
619635
audio_url_input,
620636
audio_file_input,
621637
whisper_model_input,
638+
transcription_language,
622639
api_name_input,
623640
api_key_input,
624641
use_cookies_input,

0 commit comments

Comments
 (0)