Skip to content

Lang config #620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
19 changes: 13 additions & 6 deletions App_Function_Libraries/Audio/Audio_Files.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
sanitize_filename, generate_unique_id, temp_files, logging
from App_Function_Libraries.Video_DL_Ingestion_Lib import extract_metadata
from App_Function_Libraries.Audio.Audio_Transcription_Lib import speech_to_text
from App_Function_Libraries.Utils.Whisper_Languages import get_language_code
from App_Function_Libraries.Chunk_Lib import improved_chunking_process
#
#######################################################################################################################
Expand Down Expand Up @@ -104,7 +105,7 @@ def download_audio_file(url, current_whisper_model="", use_cookies=False, cookie
logging.error(f"Unexpected error downloading audio file: {str(e)}")
raise

def process_audio_files(audio_urls, audio_files, whisper_model, api_name, api_key, use_cookies, cookies, keep_original,
def process_audio_files(audio_urls, audio_files, whisper_model, transcription_language, api_name, api_key, use_cookies, cookies, keep_original,
custom_keywords, custom_prompt_input, chunk_method, max_chunk_size, chunk_overlap,
use_adaptive_chunking, use_multi_level_chunking, chunk_language, diarize,
keep_timestamps, custom_title, record_system_audio, recording_duration,
Expand All @@ -117,6 +118,9 @@ def process_audio_files(audio_urls, audio_files, whisper_model, api_name, api_ke
if not system_audio_device:
raise ValueError("Please select an audio output device to record from")

# Convert language name to code
lang_code = get_language_code(transcription_language) if transcription_language else "auto"

# Add recording logic before processing files
recorded_files = []
start_time = time.time() # Start time for processing
Expand Down Expand Up @@ -260,7 +264,7 @@ def convert_mp3_to_wav(mp3_file_path):
temp_files.append(wav_file_path)

# Transcribe audio
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=diarize)
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, selected_source_lang=lang_code, diarize=diarize)

# Handle segments format
if isinstance(segments, dict) and 'segments' in segments:
Expand Down Expand Up @@ -341,7 +345,7 @@ def convert_mp3_to_wav(mp3_file_path):
temp_files.append(wav_file_path)

# Transcribe audio
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, diarize=diarize)
segments = speech_to_text(wav_file_path, whisper_model=whisper_model, selected_source_lang=lang_code, diarize=diarize)

if isinstance(segments, dict) and 'segments' in segments:
segments = segments['segments']
Expand Down Expand Up @@ -528,7 +532,7 @@ def download_youtube_audio(url):


def process_podcast(url, title, author, keywords, custom_prompt, api_name, api_key, whisper_model,
keep_original=False, enable_diarization=False, use_cookies=False, cookies=None,
transcription_language=None, keep_original=False, enable_diarization=False, use_cookies=False, cookies=None,
chunk_method=None, max_chunk_size=300, chunk_overlap=0, use_adaptive_chunking=False,
use_multi_level_chunking=False, chunk_language='english', keep_timestamps=True):
"""
Expand Down Expand Up @@ -595,6 +599,9 @@ def cleanup_files():
progress = [] # Initialize progress messages

try:
# Convert language name to code
lang_code = get_language_code(transcription_language) if transcription_language else "auto"

# Handle cookies if required
if use_cookies:
cookies = json.loads(cookies)
Expand Down Expand Up @@ -639,9 +646,9 @@ def cleanup_files():
# Transcribe the podcast audio
try:
if enable_diarization:
segments = speech_to_text(audio_file, whisper_model=whisper_model, diarize=True)
segments = speech_to_text(audio_file, whisper_model=whisper_model, selected_source_lang=lang_code, diarize=True)
else:
segments = speech_to_text(audio_file, whisper_model=whisper_model)
segments = speech_to_text(audio_file, whisper_model=whisper_model, selected_source_lang=lang_code)
# SEems like this could be optimized... FIXME
def format_segment(segment):
start = segment.get('start', 0)
Expand Down
24 changes: 19 additions & 5 deletions App_Function_Libraries/Audio/Audio_Transcription_Lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,19 @@ def parse_device_id(selected_device_text: str):

##########################################################
# Transcription Sink Function
def transcribe_audio(audio_data: np.ndarray, transcription_provider, sample_rate: int = 16000, speaker_lang=None, whisper_model="distil-large-v3") -> str:
def transcribe_audio(audio_data: np.ndarray, transcription_provider=None, sample_rate: int = 16000, speaker_lang=None, whisper_model=None) -> str:
"""
Unified transcribe entry point.
Chooses faster-whisper or Qwen2Audio based on config.
"""
loaded_config_data = load_and_log_configs()
if not transcription_provider:
# Load default transcription provider via config file
transcription_provider = loaded_config_data['STT-Settings']['default_transcriber']
transcription_provider = loaded_config_data['STT_Settings']['default_stt_provider']
if whisper_model is None:
whisper_model = loaded_config_data['STT_Settings'].get('default_whisper_model', 'distil-large-v3')
if speaker_lang is None:
speaker_lang = loaded_config_data['STT_Settings'].get('default_stt_language', 'en')

if transcription_provider.lower() == 'qwen2audio':
logging.info("Transcribing using Qwen2Audio")
Expand Down Expand Up @@ -650,15 +654,25 @@ def format_time(total_seconds: float) -> str:

def speech_to_text(
audio_file_path: str,
whisper_model: str = 'distil-large-v3',
selected_source_lang: str = 'en', # Changed order of parameters
vad_filter: bool = False,
whisper_model: str = None,
selected_source_lang: str = None, # Changed order of parameters
vad_filter: bool = None,
diarize: bool = False
):
"""
Transcribe audio to text using a Whisper model and optionally handle diarization.
Saves JSON output to {filename}-whisper_model-{model}.segments.json in the same directory.
"""

# Load defaults from config if not provided
loaded_config_data = load_and_log_configs()
if whisper_model is None:
whisper_model = loaded_config_data['STT_Settings'].get('default_whisper_model', 'distil-large-v3')
if selected_source_lang is None:
selected_source_lang = loaded_config_data['STT_Settings'].get('default_stt_language', 'en')
if vad_filter is None:
vad_filter_str = loaded_config_data['STT_Settings'].get('default_vad_filter', 'false')
vad_filter = vad_filter_str.lower() == 'true'

log_counter("speech_to_text_attempt", labels={"file_path": audio_file_path, "model": whisper_model})
time_start = time.time()
Expand Down
2 changes: 1 addition & 1 deletion App_Function_Libraries/Gradio_Related.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def launch_ui(share_public=None, server_mode=False):
#migrate_media_db_to_rag_chat_db(media_db_path, rag_chat_db_path)


with gr.Blocks(theme='bethecloud/storj_theme',css=css) as iface:
with gr.Blocks(theme='default',css=css) as iface:
gr.HTML(
"""
<script>
Expand Down
200 changes: 200 additions & 0 deletions App_Function_Libraries/Gradio_Related_Fixed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# Gradio_Related_Fixed.py
# Enhanced version with better error handling and fixes

import os
import sys
import webbrowser
import traceback
import gradio as gr
from pathlib import Path

# Import with error handling
try:
from App_Function_Libraries.DB.DB_Manager import get_db_config
from App_Function_Libraries.DB.RAG_QA_Chat_DB import create_tables
from App_Function_Libraries.Utils.Utils import load_and_log_configs, logging
except ImportError as e:
print(f"Import error: {e}")
print("Make sure all dependencies are installed and paths are correct.")
sys.exit(1)

# Ensure directories exist
def ensure_directories():
"""Create necessary directories if they don't exist"""
dirs = [
'Databases',
'Logs',
'Config_Files'
]
for dir_path in dirs:
Path(dir_path).mkdir(exist_ok=True)

def safe_import_tabs():
"""Import all tab creation functions with error handling"""
tab_imports = {}

# List of all tab modules to import
tab_modules = [
('Anki_tab', ['create_anki_validation_tab', 'create_anki_generator_tab']),
('Arxiv_tab', ['create_arxiv_tab']),
('Audio_ingestion_tab', ['create_audio_processing_tab']),
('Video_transcription_tab', ['create_video_transcription_tab']),
# Add more as needed
]

for module_name, functions in tab_modules:
try:
module = __import__(f'App_Function_Libraries.Gradio_UI.{module_name}', fromlist=functions)
for func_name in functions:
tab_imports[func_name] = getattr(module, func_name)
except Exception as e:
logging.error(f"Failed to import {module_name}: {e}")
# Create a dummy function that shows an error tab
for func_name in functions:
tab_imports[func_name] = lambda: gr.Markdown(f"Error loading {func_name}: {str(e)}")

return tab_imports

def launch_ui_safe(share_public=None, server_mode=False, demo_mode=False):
"""Enhanced launch_ui with better error handling"""

# Ensure directories exist
ensure_directories()

# Don't open browser in demo mode
if not demo_mode:
try:
webbrowser.open_new_tab('http://127.0.0.1:7860/?__theme=dark')
except Exception as e:
logging.warning(f"Could not open browser: {e}")

share = share_public

# CSS styling
css = """
.result-box {
margin-bottom: 20px;
border: 1px solid #ddd;
padding: 10px;
}
.result-box.error {
border-color: #ff0000;
background-color: #ffeeee;
}
.transcription, .summary {
max-height: 800px;
overflow-y: auto;
border: 1px solid #eee;
padding: 10px;
margin-top: 10px;
}
#scrollable-textbox textarea {
max-height: 500px !important;
overflow-y: auto !important;
}
"""

try:
# Load configuration with error handling
config = load_and_log_configs()
if not config:
logging.error("Failed to load configuration")
config = {'db_config': {'sqlite_path': './Databases/media_db.db', 'type': 'sqlite'}}

# Get database paths
db_config = config.get('db_config', {})
media_db_path = db_config.get('sqlite_path', './Databases/media_db.db')

# Ensure database directory exists
db_dir = os.path.dirname(media_db_path)
if not os.path.exists(db_dir):
os.makedirs(db_dir, exist_ok=True)
logging.info(f"Created database directory: {db_dir}")

character_chat_db_path = os.path.join(db_dir, "chatDB.db")
rag_chat_db_path = os.path.join(db_dir, "rag_qa.db")

# Initialize databases with error handling
try:
create_tables()
logging.info("Database tables created successfully")
except Exception as e:
logging.error(f"Error creating database tables: {e}")

# Import all tab functions
tabs = safe_import_tabs()

# Create Gradio interface
with gr.Blocks(theme='default', css=css) as iface:
# Add dark mode script
gr.HTML("""
<script>
document.addEventListener('DOMContentLoaded', (event) => {
document.body.classList.add('dark');
document.querySelector('gradio-app').style.backgroundColor = 'var(--color-background-primary)';
});
</script>
""")

# Get database type
db_type = db_config.get('type', 'sqlite')

# Header
gr.Markdown("# tl/dw: Your LLM-powered Research Multi-tool")
gr.Markdown(f"(Using {db_type.capitalize()} Database)")

# Create minimal interface for testing
with gr.Tabs():
with gr.TabItem("Status", id="status"):
gr.Markdown("## System Status")
gr.Markdown(f"✅ Application loaded successfully")
gr.Markdown(f"📁 Database path: {media_db_path}")
gr.Markdown(f"🗄️ Database type: {db_type}")

with gr.TabItem("Test", id="test"):
gr.Markdown("## Test Tab")
test_input = gr.Textbox(label="Test Input")
test_output = gr.Textbox(label="Test Output")
test_button = gr.Button("Test")

def test_function(text):
return f"Echo: {text}"

test_button.click(test_function, inputs=test_input, outputs=test_output)

# Launch settings
server_port = int(os.getenv('GRADIO_SERVER_PORT', 7860))

# Disable analytics
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'

# Launch the interface
launch_kwargs = {
'share': share,
'server_port': server_port,
'show_error': True
}

if server_mode:
launch_kwargs['server_name'] = "0.0.0.0"

try:
iface.launch(**launch_kwargs)
except Exception as e:
logging.error(f"Error launching Gradio interface: {e}")
# Try alternative port
logging.info("Trying alternative port 7861...")
launch_kwargs['server_port'] = 7861
iface.launch(**launch_kwargs)

except Exception as e:
logging.error(f"Critical error in launch_ui: {e}")
logging.error(traceback.format_exc())

# Create minimal error interface
with gr.Blocks() as error_iface:
gr.Markdown("# Error Loading Application")
gr.Markdown(f"An error occurred: {str(e)}")
gr.Markdown("Please check the logs for more information.")

error_iface.launch(share=False, server_port=7860)
17 changes: 17 additions & 0 deletions App_Function_Libraries/Gradio_UI/Audio_ingestion_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from App_Function_Libraries.DB.DB_Manager import list_prompts
from App_Function_Libraries.Gradio_UI.Chat_ui import update_user_prompt
from App_Function_Libraries.Gradio_UI.Gradio_Shared import whisper_models
from App_Function_Libraries.Utils.Whisper_Languages import get_whisper_language_list, get_language_code
from App_Function_Libraries.Utils.Utils import cleanup_temp_files, default_api_endpoint, global_api_endpoints, \
format_api_name, logging
# Import metrics logging
Expand Down Expand Up @@ -407,6 +408,21 @@ def toggle_cookies_box(x):
value="distil-large-v3",
label="Whisper Model"
)

# Add language selection dropdown
from App_Function_Libraries.Utils.Utils import load_and_log_configs
loaded_config_data = load_and_log_configs()
default_lang = loaded_config_data['STT_Settings'].get('default_stt_language', 'en')
language_choices = get_whisper_language_list()
default_lang_name = next((name for code, name in language_choices if code == default_lang), "English")

transcription_language = gr.Dropdown(
choices=[name for code, name in language_choices],
value=default_lang_name,
label="Transcription Language",
info="Select the language of the audio, or use Auto-detect"
)

keep_timestamps_input = gr.Checkbox(label="Keep Timestamps", value=True)

with gr.Row():
Expand Down Expand Up @@ -619,6 +635,7 @@ def toggle_chunking(x):
audio_url_input,
audio_file_input,
whisper_model_input,
transcription_language,
api_name_input,
api_key_input,
use_cookies_input,
Expand Down
Loading
Loading