diff --git a/Tests/UI/test_ingest_window.py b/Tests/UI/test_ingest_window.py new file mode 100644 index 00000000..9726a927 --- /dev/null +++ b/Tests/UI/test_ingest_window.py @@ -0,0 +1,225 @@ +# test_ingest_window.py +# +# Imports +import pytest +from pytest_mock import MockerFixture # For mocking +from pathlib import Path +# +# Third-party Libraries +from textual.app import App, ComposeResult +from textual.widgets import Button, Input, Select, Checkbox, TextArea, RadioSet, RadioButton, Collapsible, ListView, \ + ListItem, Markdown, LoadingIndicator, Label, Static +from textual.containers import Container, VerticalScroll, Horizontal, Vertical +from textual.pilot import Pilot +from textual.css.query import QueryError +# +# Local Imports +from tldw_chatbook.app import TldwCli # The main app +from tldw_chatbook.UI.Ingest_Window import IngestWindow, MEDIA_TYPES # Import MEDIA_TYPES +from tldw_chatbook.tldw_api.schemas import ProcessVideoRequest, ProcessAudioRequest, ProcessPDFRequest, \ + ProcessEbookRequest, ProcessDocumentRequest, ProcessXMLRequest, ProcessMediaWikiRequest +# +# +######################################################################################################################## +# +# Fixtures and Helper Functions + +# Helper to get the IngestWindow instance from the app +async def get_ingest_window(pilot: Pilot) -> IngestWindow: + ingest_window_query = pilot.app.query(IngestWindow) + assert ingest_window_query.is_empty is False, "IngestWindow not found" + return ingest_window_query.first() + + +@pytest.fixture +async def app_pilot() -> Pilot: + app = TldwCli() + async with app.run_test() as pilot: + # Ensure the Ingest tab is active. Default is Chat. + # Switching tabs is handled by app.py's on_button_pressed for tab buttons. + # We need to find the Ingest tab button and click it. + # Assuming tab IDs are like "tab-ingest" + try: + await pilot.click("#tab-ingest") + except QueryError: + # Fallback if direct ID click isn't working as expected in test setup + # This might indicate an issue with tab IDs or pilot interaction timing + all_buttons = pilot.app.query(Button) + ingest_tab_button = None + for btn in all_buttons: + if btn.id == "tab-ingest": + ingest_tab_button = btn + break + assert ingest_tab_button is not None, "Ingest tab button not found" + await pilot.click(ingest_tab_button) + + # Verify IngestWindow is present and active + ingest_window = await get_ingest_window(pilot) + assert ingest_window is not None + assert ingest_window.display is True, "IngestWindow is not visible after switching to Ingest tab" + # Also check the app's current_tab reactive variable + assert pilot.app.current_tab == "ingest", "App's current_tab is not set to 'ingest'" + yield pilot + + +# Test Class +class TestIngestWindowTLDWAPI: + + async def test_initial_tldw_api_nav_buttons_and_views(self, app_pilot: Pilot): + ingest_window = await get_ingest_window(app_pilot) + # The IngestWindow itself is a container, nav buttons are direct children of its "ingest-nav-pane" + nav_pane = ingest_window.query_one("#ingest-nav-pane") + + for mt in MEDIA_TYPES: + nav_button_id = f"ingest-nav-tldw-api-{mt.replace('_', '-')}" # IDs don't have # + view_id = f"ingest-view-tldw-api-{mt.replace('_', '-')}" + + # Check navigation button exists + nav_button = nav_pane.query_one(f"#{nav_button_id}", Button) + assert nav_button is not None, f"Navigation button {nav_button_id} not found" + expected_label_part = mt.replace('_', ' ').title() + if mt == "mediawiki_dump": + expected_label_part = "MediaWiki Dump" + assert expected_label_part in str(nav_button.label), f"Label for {nav_button_id} incorrect" + + # Check view area exists + view_area = ingest_window.query_one(f"#{view_id}", Container) + assert view_area is not None, f"View area {view_id} not found" + + # Check initial visibility based on app's active ingest view + # This assumes that after switching to Ingest tab, a default sub-view *within* Ingest is activated. + # If `ingest_active_view` is set (e.g. to "ingest-view-prompts" by default), then + # all tldw-api views should be hidden. + active_ingest_view_on_app = app_pilot.app.ingest_active_view + if view_id != active_ingest_view_on_app: + assert view_area.display is False, f"{view_id} should be hidden if not the active ingest view ('{active_ingest_view_on_app}')" + else: + assert view_area.display is True, f"{view_id} should be visible as it's the active ingest view ('{active_ingest_view_on_app}')" + + @pytest.mark.parametrize("media_type", MEDIA_TYPES) + async def test_tldw_api_navigation_and_view_display(self, app_pilot: Pilot, media_type: str): + ingest_window = await get_ingest_window(app_pilot) + nav_button_id = f"ingest-nav-tldw-api-{media_type.replace('_', '-')}" + target_view_id = f"ingest-view-tldw-api-{media_type.replace('_', '-')}" + + await app_pilot.click(f"#{nav_button_id}") + await app_pilot.pause() # Allow watchers to update display properties + + # Verify target view is visible + target_view_area = ingest_window.query_one(f"#{target_view_id}", Container) + assert target_view_area.display is True, f"{target_view_id} should be visible after clicking {nav_button_id}" + assert app_pilot.app.ingest_active_view == target_view_id, f"App's active ingest view should be {target_view_id}" + + # Verify other TLDW API views are hidden + for other_mt in MEDIA_TYPES: + if other_mt != media_type: + other_view_id = f"ingest-view-tldw-api-{other_mt.replace('_', '-')}" + other_view_area = ingest_window.query_one(f"#{other_view_id}", Container) + assert other_view_area.display is False, f"{other_view_id} should be hidden when {target_view_id} is active" + + # Verify common form elements exist with dynamic IDs + common_endpoint_input = target_view_area.query_one(f"#tldw-api-endpoint-url-{media_type}", Input) + assert common_endpoint_input is not None + + common_submit_button = target_view_area.query_one(f"#tldw-api-submit-{media_type}", Button) + assert common_submit_button is not None + + # Verify media-specific options container and its widgets + if media_type == "video": + opts_container = target_view_area.query_one("#tldw-api-video-options", Container) + assert opts_container.display is True + widget = opts_container.query_one(f"#tldw-api-video-transcription-model-{media_type}", Input) + assert widget is not None + elif media_type == "audio": + opts_container = target_view_area.query_one("#tldw-api-audio-options", Container) + assert opts_container.display is True + widget = opts_container.query_one(f"#tldw-api-audio-transcription-model-{media_type}", Input) + assert widget is not None + elif media_type == "pdf": + opts_container = target_view_area.query_one("#tldw-api-pdf-options", Container) + assert opts_container.display is True + widget = opts_container.query_one(f"#tldw-api-pdf-engine-{media_type}", Select) + assert widget is not None + elif media_type == "ebook": + opts_container = target_view_area.query_one("#tldw-api-ebook-options", Container) + assert opts_container.display is True + widget = opts_container.query_one(f"#tldw-api-ebook-extraction-method-{media_type}", Select) + assert widget is not None + elif media_type == "document": # Has minimal specific options currently + opts_container = target_view_area.query_one("#tldw-api-document-options", Container) + assert opts_container.display is True + # Example: find the label if one exists + try: + label = opts_container.query_one(Label) # Assuming there's at least one label + assert label is not None + except QueryError: # If no labels, this is fine for doc + pass + elif media_type == "xml": + opts_container = target_view_area.query_one("#tldw-api-xml-options", Container) + assert opts_container.display is True + widget = opts_container.query_one(f"#tldw-api-xml-auto-summarize-{media_type}", Checkbox) + assert widget is not None + elif media_type == "mediawiki_dump": + opts_container = target_view_area.query_one("#tldw-api-mediawiki-options", Container) + assert opts_container.display is True + widget = opts_container.query_one(f"#tldw-api-mediawiki-wiki-name-{media_type}", Input) + assert widget is not None + + async def test_tldw_api_video_submission_data_collection(self, app_pilot: Pilot, mocker: MockerFixture): + media_type = "video" + ingest_window = await get_ingest_window(app_pilot) + + # Navigate to video tab by clicking its nav button + nav_button_id = f"ingest-nav-tldw-api-{media_type}" + await app_pilot.click(f"#{nav_button_id}") + await app_pilot.pause() # Allow UI to update + + target_view_id = f"ingest-view-tldw-api-{media_type}" + target_view_area = ingest_window.query_one(f"#{target_view_id}", Container) + assert target_view_area.display is True, "Video view area not displayed after click" + + # Mock the API client and its methods + mock_api_client_instance = mocker.MagicMock() + # Make process_video an async mock + mock_process_video = mocker.AsyncMock(return_value=mocker.MagicMock()) + mock_api_client_instance.process_video = mock_process_video + mock_api_client_instance.close = mocker.AsyncMock() + + mocker.patch("tldw_chatbook.Event_Handlers.ingest_events.TLDWAPIClient", return_value=mock_api_client_instance) + + # Set form values + endpoint_url_input = target_view_area.query_one(f"#tldw-api-endpoint-url-{media_type}", Input) + urls_textarea = target_view_area.query_one(f"#tldw-api-urls-{media_type}", TextArea) + video_trans_model_input = target_view_area.query_one(f"#tldw-api-video-transcription-model-{media_type}", Input) + auth_method_select = target_view_area.query_one(f"#tldw-api-auth-method-{media_type}", Select) + + endpoint_url_input.value = "http://fakeapi.com" + urls_textarea.text = "http://example.com/video.mp4" + video_trans_model_input.value = "test_video_model" + auth_method_select.value = "config_token" + + app_pilot.app.app_config = {"tldw_api": {"auth_token_config": "fake_token"}} + + submit_button_id = f"tldw-api-submit-{media_type}" + await app_pilot.click(f"#{submit_button_id}") + await app_pilot.pause(delay=0.5) + + mock_process_video.assert_called_once() + call_args = mock_process_video.call_args[0] + + assert len(call_args) >= 1, "process_video not called with request_model" + request_model_arg = call_args[0] + + assert isinstance(request_model_arg, ProcessVideoRequest) + assert request_model_arg.urls == ["http://example.com/video.mp4"] + assert request_model_arg.transcription_model == "test_video_model" + assert request_model_arg.api_key == "fake_token" + + # Example for local_file_paths if it's the second argument + if len(call_args) > 1: + local_files_arg = call_args[1] + assert local_files_arg == [], "local_files_arg was not empty" + else: + # This case implies process_video might not have received local_file_paths, + # which could be an issue if it's expected. For now, let's assume it's optional. + pass diff --git a/pyproject.toml b/pyproject.toml index cfb88bc0..6aa9b042 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,15 +4,15 @@ requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" [project] -name = "tldw_chatbook" # This is the distribution name (what you pip install) +name = "tldw_chatbook" version = "0.1.0" authors = [ { name="Robert Musser", email="contact@rmusser.net" }, ] description = "A Textual TUI for chatting with LLMs, and interacting with the tldw server." -readme = "README.md" # Assumes README.md is in the project root +readme = "README.md" requires-python = ">=3.11" -license = { file="LICENSE" } # Assumes LICENSE file is in the project root +license = { file="LICENSE" } keywords = ["tui", "cli", "llm", "textual", "ai", "chat"] classifiers = [ "Development Status :: 3 - Alpha", @@ -20,11 +20,10 @@ classifiers = [ "Intended Audience :: End Users/Desktop", "Topic :: Utilities", "Topic :: Terminals", - "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", # Corrected to AGPL + "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", - # "Programming Language :: Python :: 3.13", # Only list if you actively test/support it "Environment :: Console", "Operating System :: OS Independent", ] @@ -34,9 +33,9 @@ dependencies = [ "chardet", "httpx", "loguru", - "textual>=3.3.0", # Specify a minimum Textual version if features depend on it + "textual>=3.3.0", "requests", - "rich", # Usually pulled in by Textual, but explicit is fine + "rich", "Pillow", "PyYAML", "pydantic", diff --git a/requirements.txt b/requirements.txt index d60ba92a..0b006470 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ loguru pydantic pyyaml httpx -pytest +#pytest emoji # Chunk Library #tqdm diff --git a/tldw_chatbook/DB/Client_Media_DB_v2.py b/tldw_chatbook/DB/Client_Media_DB_v2.py index ca041342..6a9cde42 100644 --- a/tldw_chatbook/DB/Client_Media_DB_v2.py +++ b/tldw_chatbook/DB/Client_Media_DB_v2.py @@ -1917,9 +1917,12 @@ def add_media_with_keywords(self, # --- Handle Unvectorized Chunks --- if chunks is not None: # chunks argument was provided (could be empty or list of dicts) if action == "updated": - # Hard delete old chunks for this media_id if updating - logging.debug(f"Hard deleting existing UnvectorizedMediaChunks for updated media_id {media_id}.") - conn.execute("DELETE FROM UnvectorizedMediaChunks WHERE media_id = ?", (media_id,)) + # If overwriting and new chunks are provided, clear old ones. + # If `chunks` is an empty list, it also means clear old ones. + if overwrite: # Only delete if overwrite is true + logging.debug( + f"Hard deleting existing UnvectorizedMediaChunks for updated media_id {media_id} due to overwrite and new chunks being provided.") + conn.execute("DELETE FROM UnvectorizedMediaChunks WHERE media_id = ?", (media_id,)) num_chunks_saved = 0 if chunks: # If chunks list is not empty @@ -1980,9 +1983,18 @@ def add_media_with_keywords(self, raise DatabaseError(f"Failed to save chunk {i} due to integrity constraint: {e}") from e logging.info(f"Saved {num_chunks_saved} unvectorized chunks for media_id {media_id}.") - # Update Media chunking_status to 'completed' as chunk processing is done (even if 0 chunks were provided) - conn.execute("UPDATE Media SET chunking_status = 'completed' WHERE id = ?", (media_id,)) - logging.debug(f"Updated Media chunking_status to 'completed' for media_id {media_id} after chunk processing.") + # Update Media chunking_status + # If chunks were provided (even an empty list, meaning "clear existing and add these (none)"), + # then chunking is considered 'completed' from the perspective of this operation. + # If `chunks` was None (meaning "don't touch existing chunks"), status remains as is or 'pending'. + final_chunking_status_for_media = 'completed' # if chunks is not None + # If the main `perform_chunking` flag (from request, not DB field) was false, + # then perhaps status should be different. For now, if chunks data is passed, it's 'completed'. + # This might need more nuanced logic based on the `perform_chunking` flag from the original request. + conn.execute("UPDATE Media SET chunking_status = ? WHERE id = ?", + (final_chunking_status_for_media, media_id,)) + logging.debug( + f"Updated Media chunking_status to '{final_chunking_status_for_media}' for media_id {media_id} after chunk processing.") # Original chunk_options placeholder log if chunk_options: diff --git a/tldw_chatbook/Event_Handlers/LLM_Management_Events/llm_management_events_transformers.py b/tldw_chatbook/Event_Handlers/LLM_Management_Events/llm_management_events_transformers.py new file mode 100644 index 00000000..c13d1400 --- /dev/null +++ b/tldw_chatbook/Event_Handlers/LLM_Management_Events/llm_management_events_transformers.py @@ -0,0 +1,315 @@ +# /tldw_chatbook/Event_Handlers/llm_management_events_transformers.py +from __future__ import annotations + +import logging +import shlex +import subprocess +from pathlib import Path +from typing import TYPE_CHECKING, List, Dict, Any, Optional +import functools # For download worker + +from textual.widgets import Input, RichLog +from textual.css.query import QueryError + +# For listing local models, you might need to interact with huggingface_hub or scan directories +try: + from huggingface_hub import HfApi, constants as hf_constants + + # from huggingface_hub import list_models, model_info as hf_model_info # For online search + # from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError + HUGGINGFACE_HUB_AVAILABLE = True +except ImportError: + HUGGINGFACE_HUB_AVAILABLE = False + hf_constants = None # type: ignore + +if TYPE_CHECKING: + from tldw_chatbook.app import TldwCli + # textual_fspicker is imported dynamically in the handler + +# Import shared helpers if needed +from .llm_management_events import \ + _make_path_update_callback # _stream_process, stream_worker_output_to_log (not used by download worker directly) + + +# --- Worker function for model download (can be similar to the existing one) --- +def run_transformers_model_download_worker(app_instance: "TldwCli", command: List[str], + models_base_dir_for_cwd: str) -> str: + logger = getattr(app_instance, "loguru_logger", logging.getLogger(__name__)) + quoted_command = ' '.join(shlex.quote(c) for c in command) + # The actual target download path is part of the command (--local-dir) + logger.info(f"Transformers Download WORKER starting: {quoted_command}") + + process: Optional[subprocess.Popen] = None + final_status_message = f"Transformers Download WORKER: Default status for {quoted_command}" + pid_str = "N/A" + + try: + # The command already includes --local-dir pointing to the exact target. + # We might want to run huggingface-cli from a neutral directory or models_base_dir_for_cwd + # if --local-dir is relative, but since we make it absolute, cwd is less critical. + # For consistency, let's use models_base_dir_for_cwd if provided and valid. + cwd_to_use = models_base_dir_for_cwd if Path(models_base_dir_for_cwd).is_dir() else None + + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + universal_newlines=True, + bufsize=1, + cwd=cwd_to_use + ) + pid_str = str(process.pid) if process and process.pid else "UnknownPID" + logger.info(f"Transformers Download WORKER: Subprocess launched, PID: {pid_str}") + app_instance.call_from_thread(app_instance._update_transformers_log, f"[PID:{pid_str}] Download starting...\n") + + # communicate() waits for termination + stdout_data, stderr_data = process.communicate(timeout=600) # 10 min timeout for download + + logger.info( + f"Transformers Download WORKER: communicate() completed. PID {pid_str}, Exit Code: {process.returncode}") + + if stdout_data: + logger.info(f"Transformers Download WORKER STDOUT:\n{stdout_data.strip()}") + app_instance.call_from_thread(app_instance._update_transformers_log, + f"--- STDOUT (PID:{pid_str}) ---\n{stdout_data.strip()}\n") + if stderr_data: + logger.error(f"Transformers Download WORKER STDERR:\n{stderr_data.strip()}") + app_instance.call_from_thread(app_instance._update_transformers_log, + f"--- STDERR (PID:{pid_str}) ---\n[bold red]{stderr_data.strip()}[/]\n") + + if process.returncode != 0: + final_status_message = f"Model download (PID:{pid_str}) failed with code: {process.returncode}." + if stderr_data: final_status_message += f"\nSTDERR: {stderr_data.strip()}" + else: + final_status_message = f"Model download (PID:{pid_str}) completed successfully (code: {process.returncode}). Model should be in target --local-dir." + + app_instance.call_from_thread(app_instance._update_transformers_log, f"{final_status_message}\n") + return final_status_message + + except FileNotFoundError: + msg = f"ERROR: huggingface-cli not found. Please ensure it's installed and in PATH." + logger.error(msg) + app_instance.call_from_thread(app_instance._update_transformers_log, f"[bold red]{msg}[/]\n") + raise + except subprocess.TimeoutExpired: + msg = f"ERROR: Model download (PID:{pid_str}) timed out after 600s." + logger.error(msg) + if process: process.kill() + app_instance.call_from_thread(app_instance._update_transformers_log, f"[bold red]{msg}[/]\n") + raise RuntimeError(msg) # Make worker fail + except Exception as err: + msg = f"CRITICAL ERROR in Transformers Download worker: {err} (Command: {quoted_command})" + logger.error(msg, exc_info=True) + app_instance.call_from_thread(app_instance._update_transformers_log, f"[bold red]{msg}[/]\n") + raise + finally: + logger.info(f"Transformers Download WORKER: Worker for '{quoted_command}' finishing.") + if process and process.poll() is None: + logger.warning(f"Transformers Download WORKER (PID:{pid_str}): Process still running in finally. Killing.") + process.kill() + + +async def handle_transformers_list_local_models_button_pressed(app: "TldwCli") -> None: + logger = getattr(app, "loguru_logger", logging.getLogger(__name__)) + logger.info("Transformers list local models button pressed.") + + models_dir_input: Input = app.query_one("#transformers-models-dir-path", Input) + models_list_widget: RichLog = app.query_one("#transformers-local-models-list", RichLog) + log_output_widget: RichLog = app.query_one("#transformers-log-output", RichLog) + + models_dir_str = models_dir_input.value.strip() + if not models_dir_str: + app.notify("Please specify a local models directory first.", severity="warning") + models_dir_input.focus() + return + + models_path = Path(models_dir_str).resolve() # Resolve to absolute path + if not models_path.is_dir(): + app.notify(f"Directory not found: {models_path}", severity="error") + models_dir_input.focus() + return + + models_list_widget.clear() + log_output_widget.write(f"Scanning for models in: {models_path}...\n") + app.notify("Scanning for local models...") + + found_models_display = [] + try: + # This basic scan looks for directories that might be model repos. + # A 'blobs' and 'refs' subdirectory alongside 'snapshots' is common for full cache structure. + # Individual model downloads might just have 'snapshots' or be flat. + + # Heuristic 1: Look for 'snapshots' directory, then list its children + # These children are usually named after commit hashes. Inside them are the actual files. + # We need to find a way to map these back to a model name. + # Often, a .gitattributes or similar file at a higher level might exist. + + # Heuristic 2: Look for directories containing config.json + # This is simpler but might find nested utility models or non-root model dirs. + + count = 0 + for item_path in models_path.rglob("config.json"): + if item_path.is_file(): + model_root_dir = item_path.parent + # Try to infer a model name. This is tricky. + # If models_path is like ".../hub/models--org--modelname", then model_root_dir might be a snapshot hash. + # If models_path is a custom dir where user put "org/modelname" folders, it's easier. + + display_name = "" + try: + # Attempt to make a "repo_id" like name from the path relative to models_path + relative_to_scan_root = model_root_dir.relative_to(models_path) + # If models_path is the HF cache, relative_to_scan_root might be "models--org--repo/snapshots/hash" + # We want to extract "org/repo" + parts = list(relative_to_scan_root.parts) + if parts and parts[0].startswith("models--"): + name_part = parts[0].replace("models--", "") + display_name = name_part.replace("--", "/", 1) # Replace only first -- + else: # Assume a flatter structure or direct model name as folder + display_name = str(relative_to_scan_root) + except ValueError: # Not a subpath, models_path itself might be the model_root_dir + if model_root_dir == models_path: + display_name = models_path.name + else: # Some other structure + display_name = model_root_dir.name # Best guess + + # Check for actual model files + has_weights = (model_root_dir / "pytorch_model.bin").exists() or \ + (model_root_dir / "model.safetensors").exists() or \ + (model_root_dir / "tf_model.h5").exists() + + if has_weights: + count += 1 + found_models_display.append(f"[green]{display_name}[/] ([dim]at {model_root_dir}[/dim])") + + if found_models_display: + models_list_widget.write("\n".join(found_models_display)) + app.notify(f"Found {count} potential local models (based on config.json and weights).") + else: + models_list_widget.write("No model directories found with config.json and model weights.") + app.notify("No local models found with this scan method.", severity="information") + log_output_widget.write("Local model scan complete.\n") + + except Exception as e: + logger.error(f"Error scanning for local models: {e}", exc_info=True) + log_output_widget.write(f"[bold red]Error scanning models: {e}[/]\n") + app.notify("Error during local model scan.", severity="error") + + +async def handle_transformers_download_model_button_pressed(app: "TldwCli") -> None: + logger = getattr(app, "loguru_logger", logging.getLogger(__name__)) + logger.info("Transformers download model button pressed.") + + repo_id_input: Input = app.query_one("#transformers-download-repo-id", Input) + revision_input: Input = app.query_one("#transformers-download-revision", Input) + models_dir_input: Input = app.query_one("#transformers-models-dir-path", Input) + log_output_widget: RichLog = app.query_one("#transformers-log-output", RichLog) + + repo_id = repo_id_input.value.strip() + revision = revision_input.value.strip() or None + models_dir_str = models_dir_input.value.strip() + + if not repo_id: + app.notify("Model Repo ID is required to download.", severity="error") + repo_id_input.focus() + return + + if not models_dir_str: + # Default to HF cache if not specified, but warn user. + if HUGGINGFACE_HUB_AVAILABLE and hf_constants and Path(hf_constants.HF_HUB_CACHE).is_dir(): + models_dir_str = str(hf_constants.HF_HUB_CACHE) + app.notify(f"No local directory set, will download to Hugging Face cache: {models_dir_str}", + severity="warning", timeout=7) + models_dir_input.value = models_dir_str # Update UI + else: + app.notify("Local models directory must be set to specify download location.", severity="error") + models_dir_input.focus() + return + + # huggingface-cli download --local-dir specifies the *target* directory for THIS model's files. + # It will create subdirectories based on the repo structure under this path. + # Example: if --local-dir is /my/models/bert, files go into /my/models/bert/snapshots/hash/... + # We want the user-provided models_dir_str to be the root under which models are organized. + # So, the --local-dir for huggingface-cli should be models_dir_str itself, or a subfolder we define. + # Let's make it download into a subfolder named after the repo_id within models_dir_str for clarity. + + # Sanitize repo_id for use as a directory name part + safe_repo_id_subdir = repo_id.replace("/", "--") + target_model_specific_dir = Path(models_dir_str) / safe_repo_id_subdir + + log_output_widget.write( + f"Attempting to download '{repo_id}' (rev: {revision or 'latest'}) to '{target_model_specific_dir}'...\n") + target_model_specific_dir.mkdir(parents=True, exist_ok=True) # Ensure target dir exists + + command = [ + "huggingface-cli", + "download", + repo_id, + "--local-dir", str(target_model_specific_dir), + "--local-dir-use-symlinks", "False" # Usually want actual files for local management + ] + if revision: + command.extend(["--revision", revision]) + + # The worker CWD should be a neutral place, or the parent of target_model_specific_dir + worker_cwd = models_dir_str + + worker_callable = functools.partial( + run_transformers_model_download_worker, + app, + command, + worker_cwd + ) + + app.run_worker( + worker_callable, + group="transformers_download", + description=f"Downloading HF Model {repo_id}", + exclusive=False, + thread=True, + ) + app.notify(f"Starting download for {repo_id}...") + + +async def handle_transformers_browse_models_dir_button_pressed(app: "TldwCli") -> None: + logger = getattr(app, "loguru_logger", logging.getLogger(__name__)) + logger.debug("Transformers browse models directory button pressed.") + + try: + from textual_fspicker import FileOpen, Filters # Ensure it's imported for runtime + except ImportError: + app.notify("File picker utility (textual-fspicker) not available.", severity="error") + logger.error("textual_fspicker not found for Transformers model dir browsing.") + return + + default_loc_str = str(Path.home()) + if HUGGINGFACE_HUB_AVAILABLE and hf_constants: + try: + # Use HF_HOME if set, otherwise default cache. + # hf_constants.HF_HUB_CACHE points to the 'hub' subdir, e.g., ~/.cache/huggingface/hub + # We might want to default to ~/.cache/huggingface or where user typically stores models + hf_cache_dir = Path(hf_constants.HF_HUB_CACHE) + if hf_cache_dir.is_dir(): + default_loc_str = str(hf_cache_dir) + elif hf_cache_dir.parent.is_dir(): # Try one level up, e.g. ~/.cache/huggingface + default_loc_str = str(hf_cache_dir.parent) + except Exception: # pylint: disable=broad-except + pass + + logger.debug(f"Transformers browse models dir: starting location '{default_loc_str}'") + + await app.push_screen( + FileOpen( + location=default_loc_str, + select_dirs=True, # We want to select a directory + title="Select Local Hugging Face Models Directory", + # No specific filters needed for directory selection + ), + # This callback will update the Input widget with id "transformers-models-dir-path" + callback=_make_path_update_callback(app, "transformers-models-dir-path"), + ) + + + + diff --git a/tldw_chatbook/Event_Handlers/conv_char_events.py b/tldw_chatbook/Event_Handlers/conv_char_events.py index f21664ef..8f2fe452 100644 --- a/tldw_chatbook/Event_Handlers/conv_char_events.py +++ b/tldw_chatbook/Event_Handlers/conv_char_events.py @@ -13,7 +13,7 @@ # 3rd-Party Imports from loguru import logger as loguru_logger from textual.widgets import ( - Input, ListView, TextArea, Label, Collapsible, Select, Static, ListItem + Input, ListView, TextArea, Label, Collapsible, Select, Static, ListItem, Button ) from textual.containers import VerticalScroll from textual.css.query import QueryError @@ -1473,7 +1473,7 @@ async def handle_ccp_editor_prompt_clone_button_pressed(app: 'TldwCli') -> None: async def handle_ccp_editor_prompt_delete_button_pressed(app: 'TldwCli') -> None: """Handles deleting the prompt currently in the CENTER PANE editor.""" - logger = getattr(app, 'loguru_logger', logging) + logger = getattr(app, 'loguru_logger', loguru_logger) logger.info("CCP Editor Delete Prompt button pressed.") if not app.prompts_service_initialized or app.current_prompt_id is None: app.notify("No prompt loaded in editor to delete or service unavailable.", severity="warning") @@ -1513,6 +1513,118 @@ async def handle_ccp_editor_prompt_delete_button_pressed(app: 'TldwCli') -> None # --- CCP Center Pane Editor Button Handlers End --- # ############################################################## +async def handle_ccp_editor_char_save_button_pressed(app: 'TldwCli') -> None: + """Handles saving a new or existing character from the CENTER PANE editor.""" + logger = getattr(app, 'loguru_logger', loguru_logger) + logger.info("CCP Editor: Save Character button pressed.") + + if not app.chachanotes_db: + app.notify("Database service not available for characters.", severity="error") + logger.error("app.chachanotes_db not available for saving character.") + return + db = app.chachanotes_db + + try: + # Retrieve character data from the UI input fields + char_name = app.query_one("#ccp-editor-char-name-input", Input).value.strip() + avatar_path = app.query_one("#ccp-editor-char-avatar-input", Input).value.strip() + description = app.query_one("#ccp-editor-char-description-textarea", TextArea).text.strip() + personality = app.query_one("#ccp-editor-char-personality-textarea", TextArea).text.strip() + scenario = app.query_one("#ccp-editor-char-scenario-textarea", TextArea).text.strip() + first_message = app.query_one("#ccp-editor-char-first-message-textarea", TextArea).text.strip() + keywords_text = app.query_one("#ccp-editor-char-keywords-textarea", TextArea).text.strip() + keywords_list = [kw.strip() for kw in keywords_text.split(',') if kw.strip()] + + if not char_name: + app.notify("Character Name cannot be empty.", severity="error", timeout=4) + app.query_one("#ccp-editor-char-name-input", Input).focus() + return + + character_data_for_db_op: Dict[str, Any] = { + "name": char_name, + "description": description, + "personality": personality, + "scenario": scenario, + "first_message": first_message, + "keywords": keywords_list, + "image_path": avatar_path, # Storing avatar path as image_path + # Ensure other relevant fields from your DB schema are included if needed + # e.g., "creator_notes", "system_prompt", "post_history_instructions", + # "alternate_greetings", "tags", "creator", "character_version", "extensions" + } + + saved_character_details: Optional[Dict[str, Any]] = None + current_editing_id = app.current_editing_character_id + current_editing_data = cast(Optional[Dict[str, Any]], app.current_editing_character_data) + + if current_editing_id is None: # New character + logger.info(f"Attempting to add new character: {char_name}") + saved_character_details = db.add_character_card(character_data=character_data_for_db_op) + if saved_character_details and saved_character_details.get("id"): + logger.info(f"New character '{char_name}' added. ID: {saved_character_details['id']}") + app.notify(f"Character '{char_name}' saved successfully.", severity="information") + else: + logger.error(f"Failed to save new character '{char_name}'. DB response: {saved_character_details}") + app.notify(f"Failed to save new character '{char_name}'.", severity="error") + return + else: # Existing character + logger.info(f"Attempting to update character ID: {current_editing_id}, Name: {char_name}") + if not current_editing_data: + logger.error(f"Cannot update character {current_editing_id}: current editing data is missing.") + app.notify("Error: Current character data is missing. Please reload.", severity="error") + return + + current_version = current_editing_data.get('version') + if current_version is None: + logger.error(f"Cannot update character {current_editing_id}: version is missing from loaded data.") + app.notify("Error: Character version is missing. Please reload and try again.", severity="error") + return + + saved_character_details = db.update_character_card( + character_id=current_editing_id, + update_data=character_data_for_db_op, + expected_version=current_version + ) + if saved_character_details: + logger.info(f"Character '{char_name}' (ID: {current_editing_id}) updated successfully.") + app.notify(f"Character '{char_name}' updated successfully.", severity="information") + else: + logger.error(f"Failed to update character '{char_name}'. DB response: {saved_character_details}") + app.notify(f"Failed to update character '{char_name}'.", severity="error") + return + + if saved_character_details and saved_character_details.get("id"): + new_char_id = saved_character_details["id"] + # Reload the character into the editor to reflect any changes and update state + await _helper_ccp_load_character_into_center_pane_editor(app, new_char_id) + await populate_ccp_character_select(app) # Refresh dropdown list + try: + cancel_button = app.query_one("#ccp-editor-char-cancel-button", Button) + cancel_button.add_class("hidden") + except QueryError: + logger.error("Failed to find #ccp-editor-char-cancel-button to add 'hidden' class post-save.") + else: + # This case should ideally not be reached if errors are returned above. + logger.warning("Save/Update operation completed but no valid character details received.") + # Optionally, try to reload current if update failed but no specific error caught + if current_editing_id: + await _helper_ccp_load_character_into_center_pane_editor(app, current_editing_id) + + + except ConflictError as e_conflict: + logger.warning(f"Conflict saving character '{char_name}': {e_conflict}", exc_info=True) + app.notify(f"Save conflict: Data was modified elsewhere. Please reload and try again.", severity="error", timeout=7) + if app.current_editing_character_id: # Reload to show current state from DB + await _helper_ccp_load_character_into_center_pane_editor(app, app.current_editing_character_id) + except CharactersRAGDBError as e_db: + logger.error(f"Database error saving character '{char_name}': {e_db}", exc_info=True) + app.notify(f"Database error saving character: {type(e_db).__name__}", severity="error") + except QueryError as e_query: + logger.error(f"UI component error saving character: {e_query}", exc_info=True) + app.notify("UI Error: Could not access character editor fields.", severity="error") + except Exception as e_unexp: + logger.error(f"Unexpected error saving character '{char_name}': {e_unexp}", exc_info=True) + app.notify(f"An unexpected error occurred: {type(e_unexp).__name__}", severity="error") # ############################################################## # --- CCP Center Pane Editor Clearance --- @@ -1522,10 +1634,12 @@ async def _helper_ccp_clear_center_pane_character_editor_fields(app: 'TldwCli') try: # Assuming these are the IDs for the CENTER PANE character editor app.query_one("#ccp-editor-char-name-input", Input).value = "" + app.query_one("#ccp-editor-char-avatar-input", Input).value = "" app.query_one("#ccp-editor-char-description-textarea", TextArea).text = "" app.query_one("#ccp-editor-char-personality-textarea", TextArea).text = "" app.query_one("#ccp-editor-char-scenario-textarea", TextArea).text = "" app.query_one("#ccp-editor-char-first-message-textarea", TextArea).text = "" + app.query_one("#ccp-editor-char-keywords-textarea", TextArea).text = "" # Add other fields if they exist in the center pane editor # e.g., app.query_one("#ccp-editor-char-system-prompt-textarea", TextArea).text = "" @@ -1547,6 +1661,8 @@ async def _helper_ccp_load_character_into_center_pane_editor(app: 'TldwCli', cha try: # Use ccl (Character_Chat_Lib) to load the character data + # Assuming load_character_and_image is the correct function from ccl + # and it returns (character_data_dict, initial_ui_history_list, PIL_Image_object_or_None) char_data, _, char_image_pil = ccl.load_character_and_image( app.chachanotes_db, character_id, @@ -1560,27 +1676,18 @@ async def _helper_ccp_load_character_into_center_pane_editor(app: 'TldwCli', cha # Populate UI elements in the CENTER PANE character editor app.query_one("#ccp-editor-char-name-input", Input).value = char_data.get("name", "") + app.query_one("#ccp-editor-char-avatar-input", Input).value = char_data.get("image_path", char_data.get("avatar", "")) # Use image_path or avatar app.query_one("#ccp-editor-char-description-textarea", TextArea).text = char_data.get("description", "") app.query_one("#ccp-editor-char-personality-textarea", TextArea).text = char_data.get("personality", "") app.query_one("#ccp-editor-char-scenario-textarea", TextArea).text = char_data.get("scenario", "") - app.query_one("#ccp-editor-char-first-message-textarea", TextArea).text = char_data.get("first_message", char_data.get("first_mes","")) # check common key names - # Add other fields if present in your editor UI: - # e.g., app.query_one("#ccp-editor-char-system-prompt-textarea", TextArea).text = char_data.get("system_prompt", "") - - # Optional: Handle image display if your editor has an image area - # try: - # editor_image_placeholder = app.query_one("#ccp-editor-char-image-placeholder", Static) # Example ID - # if char_image_pil: - # editor_image_placeholder.update("Character image loaded (editor display not shown).") - # else: - # editor_image_placeholder.update("No image available for editor.") - # except QueryError: - # loguru_logger.warning("No image placeholder found in CCP character editor.") + app.query_one("#ccp-editor-char-first-message-textarea", TextArea).text = char_data.get("first_message", char_data.get("first_mes","")) + keywords_list = char_data.get("keywords", []) + app.query_one("#ccp-editor-char-keywords-textarea", TextArea).text = ", ".join(keywords_list) if keywords_list else "" app.query_one("#ccp-editor-char-name-input", Input).focus() app.notify(f"Character '{char_data.get('name', 'Unknown')}' loaded into center editor.", severity="information") - loguru_logger.info(f"Loaded character '{char_data.get('name', 'Unknown')}' into CCP center editor.") + loguru_logger.info(f"Loaded character '{char_data.get('name', 'Unknown')}' (ID: {char_data.get('id')}) into CCP center editor.") else: app.notify(f"Failed to load character details for ID: {character_id} into editor.", severity="error") await _helper_ccp_clear_center_pane_character_editor_fields(app) @@ -1604,109 +1711,6 @@ async def _helper_ccp_load_character_into_center_pane_editor(app: 'TldwCli', cha app.current_editing_character_id = None app.current_editing_character_data = None -async def handle_ccp_editor_char_save_button_pressed(app: 'TldwCli') -> None: - logger = getattr(app, 'loguru_logger', loguru_logger) # Use loguru_logger if available - logger.info("CCP Editor: Save Character button pressed.") - - if not app.chachanotes_db: # Check the correct DB instance - app.notify("Database service not available.", severity="error") - logger.error("ChaChaNotes DB not available for saving character.") - return - - try: - # Retrieve data from CENTER PANE UI fields - name = app.query_one("#ccp-editor-char-name-input", Input).value.strip() - description = app.query_one("#ccp-editor-char-description-textarea", TextArea).text.strip() - personality = app.query_one("#ccp-editor-char-personality-textarea", TextArea).text.strip() - scenario = app.query_one("#ccp-editor-char-scenario-textarea", TextArea).text.strip() - first_message = app.query_one("#ccp-editor-char-first-message-textarea", TextArea).text.strip() - # system_prompt = app.query_one("#ccp-editor-char-system-prompt-textarea", TextArea).text.strip() # If you add this field - - if not name: - app.notify("Character Name is required.", severity="error", timeout=4) - app.query_one("#ccp-editor-char-name-input", Input).focus() - return - - character_data_for_db_op: Dict[str, Any] = { - "name": name, - "description": description, - "personality": personality, - "scenario": scenario, - "first_message": first_message, # Ensure DB schema uses "first_message" or "first_mes" - # "system_prompt": system_prompt, # If added - } - - saved_character_details: Optional[Dict[str, Any]] = None - db = app.chachanotes_db # Use the correct instance - - # Use app.current_editing_character_id to determine if it's a new or existing character - current_editing_id = app.current_editing_character_id - current_editing_data_value = cast(Optional[Dict[str, Any]], app.current_editing_character_data) - - - if current_editing_id is None: # New character - logger.info(f"Attempting to add new character from editor: {name}") - # add_character_card does not take user_id directly in CharactersRAGDB - # It's usually handled by the service layer or implied by the DB connection. - # Assuming your db.add_character_card from CharactersRAGDB takes only character_data - saved_character_details = db.add_character_card( - character_data=character_data_for_db_op - ) - if saved_character_details and saved_character_details.get("id"): - logger.info(f"New character added successfully. ID: {saved_character_details['id']}") - app.notify(f"Character '{name}' saved successfully.", severity="information") - else: - logger.error(f"Failed to save new character '{name}'. DB returned: {saved_character_details}") - app.notify(f"Failed to save new character '{name}'.", severity="error") - return - - else: # Existing character - logger.info(f"Attempting to update character ID from editor: {current_editing_id}") - # Get expected_version from app.current_editing_character_data - current_version = current_editing_data_value.get("version") if current_editing_data_value else None - - if current_version is None: - logger.error(f"Cannot update character {current_editing_id}: version is missing from loaded data.") - app.notify("Cannot update: Character version is missing. Please reload.", severity="error") - return - - saved_character_details = db.update_character_card( - character_id=current_editing_id, # Must be str - update_data=character_data_for_db_op, - expected_version=current_version # Must be int - ) - if saved_character_details: - logger.info(f"Character {current_editing_id} updated successfully.") - app.notify(f"Character '{name}' updated successfully.", severity="information") - else: - logger.error(f"Failed to update character '{name}'. DB returned: {saved_character_details}") - app.notify(f"Failed to update character '{name}'.", severity="error") - return - - if saved_character_details and saved_character_details.get("id"): - # Reload into editor to reflect any DB-side changes (new ID, version) - await _helper_ccp_load_character_into_center_pane_editor(app, saved_character_details["id"]) - await populate_ccp_character_select(app) - else: - logger.warning("Save/Update operation completed but no valid character details returned.") - if current_editing_id: - await _helper_ccp_load_character_into_center_pane_editor(app, current_editing_id) - - except ConflictError as e_conflict: - logger.error(f"Conflict saving character: {e_conflict}", exc_info=True) - app.notify(f"Save conflict: {e_conflict}. Please reload and try again.", severity="error", timeout=7) - if app.current_editing_character_id: # Check before trying to reload - await _helper_ccp_load_character_into_center_pane_editor(app, app.current_editing_character_id) - except CharactersRAGDBError as e_db: - logger.error(f"Database error saving character: {e_db}", exc_info=True) - app.notify(f"Database error: {type(e_db).__name__}", severity="error") - except QueryError as e_query: - logger.error(f"UI component error saving character from editor: {e_query}", exc_info=True) - app.notify("UI Error: Could not retrieve character data from editor fields.", severity="error") - except Exception as e_unexp: - logger.error(f"Unexpected error saving character from editor: {e_unexp}", exc_info=True) - app.notify(f"Unexpected error: {type(e_unexp).__name__}", severity="error") - async def handle_ccp_editor_char_clone_button_pressed(app: 'TldwCli') -> None: logger = getattr(app, 'loguru_logger', loguru_logger) @@ -1781,6 +1785,93 @@ async def handle_ccp_editor_char_clone_button_pressed(app: 'TldwCli') -> None: app.notify(f"Unexpected error cloning: {type(e_unexp).__name__}", severity="error") +async def handle_ccp_editor_char_cancel_button_pressed(app: 'TldwCli') -> None: + """Handles cancelling an edit in the CCP CENTER PANE character editor.""" + logger = getattr(app, 'loguru_logger', loguru_logger) + logger.info("CCP Editor: Cancel Character Edit button pressed.") + + try: + cancel_button = app.query_one("#ccp-editor-char-cancel-button", Button) + cancel_button.add_class("hidden") + + if app.current_editing_character_id is not None: + # An existing character was being edited. Restore card view with original data. + stored_character_id = app.current_editing_character_id + logger.info(f"Cancelling edit for existing character ID: {stored_character_id}. Restoring card view.") + + if not app.chachanotes_db: + app.notify("Database service not available to restore character.", severity="error") + logger.error("ChaChaNotes DB not available to restore character for card view.") + # Attempt to clear editor and switch to a neutral view anyway + await _helper_ccp_clear_center_pane_character_editor_fields(app) + app.current_editing_character_id = None + app.current_editing_character_data = None + app.ccp_active_view = "conversation_messages_view" # Fallback view + return + + try: + # Fetch the original, unmodified character data for the card view + original_char_data, _, original_char_image_pil = ccl.load_character_and_image( + app.chachanotes_db, + stored_character_id, + app.notes_user_id # Assuming this is the correct user context + ) + + if original_char_data: + # Update the state for the character card view + app.current_ccp_character_details = original_char_data + app.current_ccp_character_image = original_char_image_pil + + # Switch view to the character card display + app.ccp_active_view = "character_card_view" + app.notify("Character editing cancelled. Displaying original card.", severity="information") + else: + # Failed to reload original data, fallback to clearing editor and neutral view + app.notify("Could not reload original character data. Clearing editor.", severity="warning") + logger.warning(f"Failed to reload original data for char ID {stored_character_id} on cancel.") + await _helper_ccp_clear_center_pane_character_editor_fields(app) + app.ccp_active_view = "conversation_messages_view" # Fallback + + except Exception as e_load: + logger.error(f"Error reloading original character data (ID: {stored_character_id}) on cancel: {e_load}", exc_info=True) + app.notify("Error restoring character view. Clearing editor.", severity="error") + await _helper_ccp_clear_center_pane_character_editor_fields(app) + app.ccp_active_view = "conversation_messages_view" # Fallback + + # Clear the editor's specific state + app.current_editing_character_id = None + app.current_editing_character_data = None + # Optionally, explicitly clear editor fields if not already done in error paths + # await _helper_ccp_clear_center_pane_character_editor_fields(app) # This might be redundant if view always changes + + else: + # A new character form was being edited. Clear fields and switch view. + logger.info("Cancelling creation of new character. Clearing fields and switching view.") + await _helper_ccp_clear_center_pane_character_editor_fields(app) + # Ensure state reflects no character is being edited + app.current_editing_character_id = None + app.current_editing_character_data = None + # Switch to a view that makes sense, e.g., where the user might have come from + # If there was a previously viewed card, character_card_view might be okay, + # otherwise, conversation_messages_view is a general default. + if app.current_ccp_character_details and app.current_ccp_character_details.get("id"): + app.ccp_active_view = "character_card_view" # Show previous card if one was loaded + else: + app.ccp_active_view = "conversation_messages_view" # General fallback + app.notify("New character creation cancelled.", severity="information") + + except QueryError as e_query: + logger.error(f"UI component error during cancel character edit (querying cancel button): {e_query}", exc_info=True) + app.notify("UI Error: Could not properly cancel character edit.", severity="error") + # Attempt to recover by switching to a default view + app.ccp_active_view = "conversation_messages_view" + except Exception as e_unexp: + logger.error(f"Unexpected error during cancel character edit: {e_unexp}", exc_info=True) + app.notify(f"An unexpected error occurred: {type(e_unexp).__name__}", severity="error") + # Attempt to recover + app.ccp_active_view = "conversation_messages_view" + + async def handle_ccp_editor_char_delete_button_pressed(app: 'TldwCli') -> None: logger = getattr(app, 'loguru_logger', loguru_logger) logger.info("CCP Editor: Delete Character button pressed.") @@ -1866,6 +1957,11 @@ async def handle_ccp_card_edit_button_pressed(app: 'TldwCli') -> None: await _helper_ccp_load_character_into_center_pane_editor(app, character_id_to_edit) # This helper will set app.current_editing_character_id, app.current_editing_character_data, # and app.ccp_active_view = "character_editor_view" + try: + cancel_button = app.query_one("#ccp-editor-char-cancel-button", Button) + cancel_button.remove_class("hidden") + except QueryError: + logger.error("Failed to find #ccp-editor-char-cancel-button to remove 'hidden' class.") # # End of conv_char_events.py diff --git a/tldw_chatbook/Event_Handlers/ingest_events.py b/tldw_chatbook/Event_Handlers/ingest_events.py index 2672154c..75c6c430 100644 --- a/tldw_chatbook/Event_Handlers/ingest_events.py +++ b/tldw_chatbook/Event_Handlers/ingest_events.py @@ -10,7 +10,7 @@ # 3rd-party Libraries from loguru import logger from textual.widgets import Select, Input, TextArea, Checkbox, Label, Static, Markdown, ListItem, \ - ListView, Collapsible + ListView, Collapsible, LoadingIndicator, Button from textual.css.query import QueryError from textual.containers import Container, VerticalScroll # @@ -21,7 +21,9 @@ from ..tldw_api import ( TLDWAPIClient, ProcessVideoRequest, ProcessAudioRequest, APIConnectionError, APIRequestError, APIResponseError, AuthenticationError, - MediaItemProcessResult, ProcessedMediaWikiPage # Assuming BatchMediaProcessResponse contains this + MediaItemProcessResult, ProcessedMediaWikiPage, BatchMediaProcessResponse, + ProcessPDFRequest, ProcessEbookRequest, ProcessDocumentRequest, + ProcessXMLRequest, ProcessMediaWikiRequest ) # Prompts Interop (existing) from ..Prompt_Management.Prompts_Interop import ( @@ -809,279 +811,479 @@ async def handle_tldw_api_media_type_changed(app: 'TldwCli', event_value: str) - except Exception as ex: logger.error(f"Unexpected error handling media type change: {ex}", exc_info=True) - -def _collect_common_form_data(app: 'TldwCli') -> Dict[str, Any]: - """Collects common data fields from the TLDW API form.""" +def _collect_common_form_data(app: 'TldwCli', media_type: str) -> Dict[str, Any]: + """Collects common data fields from the TLDW API form for a given media_type.""" data = {} - current_field_id_for_error = "Unknown Field" # Keep track of which field was being processed + # Keep track of which field was being processed for better error messages + # The f-string will be used in the actual query_one call. + current_field_template_for_error = "Unknown Field-{media_type}" try: - current_field_id_for_error = "#tldw-api-urls" - data["urls"] = [url.strip() for url in app.query_one("#tldw-api-urls", TextArea).text.splitlines() if url.strip()] + current_field_template_for_error = f"#tldw-api-urls-{media_type}" + data["urls"] = [url.strip() for url in app.query_one(f"#tldw-api-urls-{media_type}", TextArea).text.splitlines() if url.strip()] - current_field_id_for_error = "#tldw-api-local-files" - data["local_files"] = [fp.strip() for fp in app.query_one("#tldw-api-local-files", TextArea).text.splitlines() if fp.strip()] + current_field_template_for_error = f"#tldw-api-local-files-{media_type}" + data["local_files"] = [fp.strip() for fp in app.query_one(f"#tldw-api-local-files-{media_type}", TextArea).text.splitlines() if fp.strip()] - current_field_id_for_error = "#tldw-api-title" - data["title"] = app.query_one("#tldw-api-title", Input).value or None + current_field_template_for_error = f"#tldw-api-title-{media_type}" + data["title"] = app.query_one(f"#tldw-api-title-{media_type}", Input).value or None - current_field_id_for_error = "#tldw-api-author" - data["author"] = app.query_one("#tldw-api-author", Input).value or None + current_field_template_for_error = f"#tldw-api-author-{media_type}" + data["author"] = app.query_one(f"#tldw-api-author-{media_type}", Input).value or None - current_field_id_for_error = "#tldw-api-keywords" - data["keywords_str"] = app.query_one("#tldw-api-keywords", TextArea).text + current_field_template_for_error = f"#tldw-api-keywords-{media_type}" + data["keywords_str"] = app.query_one(f"#tldw-api-keywords-{media_type}", TextArea).text - current_field_id_for_error = "#tldw-api-custom-prompt" - data["custom_prompt"] = app.query_one("#tldw-api-custom-prompt", TextArea).text or None + current_field_template_for_error = f"#tldw-api-custom-prompt-{media_type}" + data["custom_prompt"] = app.query_one(f"#tldw-api-custom-prompt-{media_type}", TextArea).text or None - current_field_id_for_error = "#tldw-api-system-prompt" - data["system_prompt"] = app.query_one("#tldw-api-system-prompt", TextArea).text or None + current_field_template_for_error = f"#tldw-api-system-prompt-{media_type}" + data["system_prompt"] = app.query_one(f"#tldw-api-system-prompt-{media_type}", TextArea).text or None - current_field_id_for_error = "#tldw-api-perform-analysis" - data["perform_analysis"] = app.query_one("#tldw-api-perform-analysis", Checkbox).value + current_field_template_for_error = f"#tldw-api-perform-analysis-{media_type}" + data["perform_analysis"] = app.query_one(f"#tldw-api-perform-analysis-{media_type}", Checkbox).value - current_field_id_for_error = "#tldw-api-overwrite-db" - data["overwrite_existing_db"] = app.query_one("#tldw-api-overwrite-db", Checkbox).value + current_field_template_for_error = f"#tldw-api-overwrite-db-{media_type}" + data["overwrite_existing_db"] = app.query_one(f"#tldw-api-overwrite-db-{media_type}", Checkbox).value - current_field_id_for_error = "#tldw-api-perform-chunking" - data["perform_chunking"] = app.query_one("#tldw-api-perform-chunking", Checkbox).value + current_field_template_for_error = f"#tldw-api-perform-chunking-{media_type}" + data["perform_chunking"] = app.query_one(f"#tldw-api-perform-chunking-{media_type}", Checkbox).value - current_field_id_for_error = "#tldw-api-chunk-method" - chunk_method_select = app.query_one("#tldw-api-chunk-method", Select) + current_field_template_for_error = f"#tldw-api-chunk-method-{media_type}" + chunk_method_select = app.query_one(f"#tldw-api-chunk-method-{media_type}", Select) data["chunk_method"] = chunk_method_select.value if chunk_method_select.value != Select.BLANK else None - current_field_id_for_error = "#tldw-api-chunk-size" - data["chunk_size"] = int(app.query_one("#tldw-api-chunk-size", Input).value or "500") + current_field_template_for_error = f"#tldw-api-chunk-size-{media_type}" + data["chunk_size"] = int(app.query_one(f"#tldw-api-chunk-size-{media_type}", Input).value or "500") - current_field_id_for_error = "#tldw-api-chunk-overlap" - data["chunk_overlap"] = int(app.query_one("#tldw-api-chunk-overlap", Input).value or "200") + current_field_template_for_error = f"#tldw-api-chunk-overlap-{media_type}" + data["chunk_overlap"] = int(app.query_one(f"#tldw-api-chunk-overlap-{media_type}", Input).value or "200") - current_field_id_for_error = "#tldw-api-chunk-lang" - data["chunk_language"] = app.query_one("#tldw-api-chunk-lang", Input).value or None + current_field_template_for_error = f"#tldw-api-chunk-lang-{media_type}" + data["chunk_language"] = app.query_one(f"#tldw-api-chunk-lang-{media_type}", Input).value or None - current_field_id_for_error = "#tldw-api-adaptive-chunking" - data["use_adaptive_chunking"] = app.query_one("#tldw-api-adaptive-chunking", Checkbox).value + current_field_template_for_error = f"#tldw-api-adaptive-chunking-{media_type}" + data["use_adaptive_chunking"] = app.query_one(f"#tldw-api-adaptive-chunking-{media_type}", Checkbox).value - current_field_id_for_error = "#tldw-api-multi-level-chunking" - data["use_multi_level_chunking"] = app.query_one("#tldw-api-multi-level-chunking", Checkbox).value + current_field_template_for_error = f"#tldw-api-multi-level-chunking-{media_type}" + data["use_multi_level_chunking"] = app.query_one(f"#tldw-api-multi-level-chunking-{media_type}", Checkbox).value - current_field_id_for_error = "#tldw-api-custom-chapter-pattern" - data["custom_chapter_pattern"] = app.query_one("#tldw-api-custom-chapter-pattern", Input).value or None + current_field_template_for_error = f"#tldw-api-custom-chapter-pattern-{media_type}" + data["custom_chapter_pattern"] = app.query_one(f"#tldw-api-custom-chapter-pattern-{media_type}", Input).value or None - current_field_id_for_error = "#tldw-api-analysis-api-name" - analysis_api_select = app.query_one("#tldw-api-analysis-api-name", Select) + current_field_template_for_error = f"#tldw-api-analysis-api-name-{media_type}" + analysis_api_select = app.query_one(f"#tldw-api-analysis-api-name-{media_type}", Select) data["api_name"] = analysis_api_select.value if analysis_api_select.value != Select.BLANK else None - current_field_id_for_error = "#tldw-api-summarize-recursively" - data["summarize_recursively"] = app.query_one("#tldw-api-summarize-recursively", Checkbox).value + current_field_template_for_error = f"#tldw-api-summarize-recursively-{media_type}" + data["summarize_recursively"] = app.query_one(f"#tldw-api-summarize-recursively-{media_type}", Checkbox).value - current_field_id_for_error = "#tldw-api-perform-rolling-summarization" - data["perform_rolling_summarization"] = app.query_one("#tldw-api-perform-rolling-summarization", Checkbox).value + current_field_template_for_error = f"#tldw-api-perform-rolling-summarization-{media_type}" + data["perform_rolling_summarization"] = app.query_one(f"#tldw-api-perform-rolling-summarization-{media_type}", Checkbox).value except QueryError as e: # Log the specific query that failed if possible, or the last attempted field ID - logger.error(f"Error querying TLDW API form field (around {current_field_id_for_error}): {e}") - # The QueryError 'e' itself will contain the selector string that failed. + logger.error(f"Error querying TLDW API form field (around {current_field_template_for_error.format(media_type=media_type)}): {e}") app.notify(f"Error: Missing form field. Details: {e}", severity="error") raise # Re-raise to stop further processing except ValueError as e: # For int() conversion errors - logger.error(f"Error converting TLDW API form field value (around {current_field_id_for_error}): {e}") - app.notify(f"Error: Invalid value in form field (around {current_field_id_for_error}). Check numbers.", severity="error") + logger.error(f"Error converting TLDW API form field value (around {current_field_template_for_error.format(media_type=media_type)}): {e}") + app.notify(f"Error: Invalid value in form field (around {current_field_template_for_error.format(media_type=media_type)}). Check numbers.", severity="error") raise # Re-raise return data -def _collect_video_specific_data(app: 'TldwCli', common_data: Dict[str, Any]) -> ProcessVideoRequest: - current_field_id_for_error = "Unknown Video Field" +def _collect_video_specific_data(app: 'TldwCli', common_data: Dict[str, Any], media_type: str) -> ProcessVideoRequest: + current_field_template_for_error = "Unknown Video Field-{media_type}" try: - current_field_id_for_error = "#tldw-api-video-transcription-model" - common_data["transcription_model"] = app.query_one("#tldw-api-video-transcription-model", + current_field_template_for_error = f"#tldw-api-video-transcription-model-{media_type}" + common_data["transcription_model"] = app.query_one(f"#tldw-api-video-transcription-model-{media_type}", Input).value or "deepdml/faster-whisper-large-v3-turbo-ct2" - current_field_id_for_error = "#tldw-api-video-transcription-language" - common_data["transcription_language"] = app.query_one("#tldw-api-video-transcription-language", + current_field_template_for_error = f"#tldw-api-video-transcription-language-{media_type}" + common_data["transcription_language"] = app.query_one(f"#tldw-api-video-transcription-language-{media_type}", Input).value or "en" - current_field_id_for_error = "#tldw-api-video-diarize" - common_data["diarize"] = app.query_one("#tldw-api-video-diarize", Checkbox).value + current_field_template_for_error = f"#tldw-api-video-diarize-{media_type}" + common_data["diarize"] = app.query_one(f"#tldw-api-video-diarize-{media_type}", Checkbox).value - current_field_id_for_error = "#tldw-api-video-timestamp" - common_data["timestamp_option"] = app.query_one("#tldw-api-video-timestamp", Checkbox).value + current_field_template_for_error = f"#tldw-api-video-timestamp-{media_type}" + common_data["timestamp_option"] = app.query_one(f"#tldw-api-video-timestamp-{media_type}", Checkbox).value - current_field_id_for_error = "#tldw-api-video-vad" - common_data["vad_use"] = app.query_one("#tldw-api-video-vad", Checkbox).value + current_field_template_for_error = f"#tldw-api-video-vad-{media_type}" + common_data["vad_use"] = app.query_one(f"#tldw-api-video-vad-{media_type}", Checkbox).value - current_field_id_for_error = "#tldw-api-video-confab-check" - common_data["perform_confabulation_check_of_analysis"] = app.query_one("#tldw-api-video-confab-check", + current_field_template_for_error = f"#tldw-api-video-confab-check-{media_type}" + common_data["perform_confabulation_check_of_analysis"] = app.query_one(f"#tldw-api-video-confab-check-{media_type}", Checkbox).value - current_field_id_for_error = "#tldw-api-video-start-time" - common_data["start_time"] = app.query_one("#tldw-api-video-start-time", Input).value or None + current_field_template_for_error = f"#tldw-api-video-start-time-{media_type}" + common_data["start_time"] = app.query_one(f"#tldw-api-video-start-time-{media_type}", Input).value or None - current_field_id_for_error = "#tldw-api-video-end-time" - common_data["end_time"] = app.query_one("#tldw-api-video-end-time", Input).value or None + current_field_template_for_error = f"#tldw-api-video-end-time-{media_type}" + common_data["end_time"] = app.query_one(f"#tldw-api-video-end-time-{media_type}", Input).value or None common_data["keywords"] = [k.strip() for k in common_data.pop("keywords_str", "").split(',') if k.strip()] return ProcessVideoRequest(**common_data) except QueryError as e: - logger.error(f"Error querying video-specific TLDW API form field (around {current_field_id_for_error}): {e}") + logger.error(f"Error querying video-specific TLDW API form field (around {current_field_template_for_error.format(media_type=media_type)}): {e}") app.notify(f"Error: Missing video form field. Details: {e}", severity="error") raise - except ValueError as e: + except ValueError as e: # For Pydantic validation or other conversion errors logger.error( - f"Error converting video-specific TLDW API form field value (around {current_field_id_for_error}): {e}") - app.notify(f"Error: Invalid value in video form field (around {current_field_id_for_error}).", severity="error") + f"Error converting video-specific TLDW API form field value or creating request model (around {current_field_template_for_error.format(media_type=media_type)}): {e}") + app.notify(f"Error: Invalid value in video form field (around {current_field_template_for_error.format(media_type=media_type)}).", severity="error") raise -def _collect_audio_specific_data(app: 'TldwCli', common_data: Dict[str, Any]) -> ProcessAudioRequest: - current_field_id_for_error = "Unknown Audio Field" +def _collect_audio_specific_data(app: 'TldwCli', common_data: Dict[str, Any], media_type: str) -> ProcessAudioRequest: + current_field_template_for_error = "Unknown Audio Field-{media_type}" try: - current_field_id_for_error = "#tldw-api-audio-transcription-model" - common_data["transcription_model"] = app.query_one("#tldw-api-audio-transcription-model", Input).value or "deepdml/faster-distil-whisper-large-v3.5" - # other audio specific fields... + current_field_template_for_error = f"#tldw-api-audio-transcription-model-{media_type}" + common_data["transcription_model"] = app.query_one(f"#tldw-api-audio-transcription-model-{media_type}", Input).value or "deepdml/faster-distil-whisper-large-v3.5" + + current_field_template_for_error = f"#tldw-api-audio-transcription-language-{media_type}" + common_data["transcription_language"] = app.query_one(f"#tldw-api-audio-transcription-language-{media_type}", Input).value or "en" + + current_field_template_for_error = f"#tldw-api-audio-diarize-{media_type}" + common_data["diarize"] = app.query_one(f"#tldw-api-audio-diarize-{media_type}", Checkbox).value + + current_field_template_for_error = f"#tldw-api-audio-timestamp-{media_type}" + common_data["timestamp_option"] = app.query_one(f"#tldw-api-audio-timestamp-{media_type}", Checkbox).value + + current_field_template_for_error = f"#tldw-api-audio-vad-{media_type}" + common_data["vad_use"] = app.query_one(f"#tldw-api-audio-vad-{media_type}", Checkbox).value + # TODO: Add confab check if UI element is added: id=f"tldw-api-audio-confab-check-{media_type}" + common_data["keywords"] = [k.strip() for k in common_data.pop("keywords_str", "").split(',') if k.strip()] return ProcessAudioRequest(**common_data) except QueryError as e: - logger.error(f"Error querying audio-specific TLDW API form field (around {current_field_id_for_error}): {e}") + logger.error(f"Error querying audio-specific TLDW API form field (around {current_field_template_for_error.format(media_type=media_type)}): {e}") app.notify(f"Error: Missing audio form field. Details: {e}", severity="error") raise + except ValueError as e: # For Pydantic validation or other conversion errors + logger.error( + f"Error converting audio-specific TLDW API form field value or creating request model (around {current_field_template_for_error.format(media_type=media_type)}): {e}") + app.notify(f"Error: Invalid value in audio form field (around {current_field_template_for_error.format(media_type=media_type)}).", severity="error") + raise -async def handle_tldw_api_submit_button_pressed(app: 'TldwCli') -> None: - logger.info("TLDW API Submit button pressed.") - app.notify("Processing request via tldw API...") +def _collect_pdf_specific_data(app: 'TldwCli', common_data: Dict[str, Any], media_type: str) -> ProcessPDFRequest: + current_field_template_for_error = "Unknown PDF Field-{media_type}" + try: + current_field_template_for_error = f"#tldw-api-pdf-engine-{media_type}" + pdf_engine_select = app.query_one(f"#tldw-api-pdf-engine-{media_type}", Select) + common_data["pdf_parsing_engine"] = pdf_engine_select.value if pdf_engine_select.value != Select.BLANK else "pymupdf4llm" - # 1. Get Endpoint URL and Auth + common_data["keywords"] = [k.strip() for k in common_data.pop("keywords_str", "").split(',') if k.strip()] + return ProcessPDFRequest(**common_data) + except QueryError as e: + logger.error(f"Error querying PDF-specific TLDW API form field (around {current_field_template_for_error.format(media_type=media_type)}): {e}") + app.notify(f"Error: Missing PDF form field. Details: {e}", severity="error") + raise + except ValueError as e: + logger.error(f"Error creating PDF request model (around {current_field_template_for_error.format(media_type=media_type)}): {e}") + app.notify(f"Error: Invalid value in PDF form field (around {current_field_template_for_error.format(media_type=media_type)}).", severity="error") + raise + +def _collect_ebook_specific_data(app: 'TldwCli', common_data: Dict[str, Any], media_type: str) -> ProcessEbookRequest: + current_field_template_for_error = "Unknown Ebook Field-{media_type}" try: - endpoint_url_input = app.query_one("#tldw-api-endpoint-url", Input) - auth_method_select = app.query_one("#tldw-api-auth-method", Select) - media_type_select = app.query_one("#tldw-api-media-type", Select) + current_field_template_for_error = f"#tldw-api-ebook-extraction-method-{media_type}" + extraction_method_select = app.query_one(f"#tldw-api-ebook-extraction-method-{media_type}", Select) + common_data["extraction_method"] = extraction_method_select.value if extraction_method_select.value != Select.BLANK else "filtered" - endpoint_url = endpoint_url_input.value.strip() - auth_method = auth_method_select.value - selected_media_type = media_type_select.value + common_data["keywords"] = [k.strip() for k in common_data.pop("keywords_str", "").split(',') if k.strip()] + return ProcessEbookRequest(**common_data) + except QueryError as e: + logger.error(f"Error querying Ebook-specific TLDW API form field (around {current_field_template_for_error.format(media_type=media_type)}): {e}") + app.notify(f"Error: Missing Ebook form field. Details: {e}", severity="error") + raise + except ValueError as e: + logger.error(f"Error creating Ebook request model (around {current_field_template_for_error.format(media_type=media_type)}): {e}") + app.notify(f"Error: Invalid value in Ebook form field (around {current_field_template_for_error.format(media_type=media_type)}).", severity="error") + raise - if not endpoint_url: - app.notify("API Endpoint URL is required.", severity="error") - endpoint_url_input.focus() - return - if auth_method == Select.BLANK: - app.notify("Please select an Authentication Method.", severity="error") - auth_method_select.focus() - return - if selected_media_type == Select.BLANK: - app.notify("Please select a Media Type to process.", severity="error") - media_type_select.focus() - return +def _collect_document_specific_data(app: 'TldwCli', common_data: Dict[str, Any], media_type: str) -> ProcessDocumentRequest: + # No document-specific fields in UI yet, so it's just converting common_data + try: + common_data["keywords"] = [k.strip() for k in common_data.pop("keywords_str", "").split(',') if k.strip()] + # Add any document-specific fields here if they are added to the UI, using f"...-{media_type}" + return ProcessDocumentRequest(**common_data) + except Exception as e: # Catch potential Pydantic validation errors + logger.error(f"Error creating ProcessDocumentRequest for media_type {media_type}: {e}") + app.notify("Error: Could not prepare document request data.", severity="error") + raise + +def _collect_xml_specific_data(app: 'TldwCli', common_api_data: Dict[str, Any], media_type: str) -> ProcessXMLRequest: + data = {} + current_field_template_for_error = "Unknown XML Field-{media_type}" + try: + data["title"] = common_api_data.get("title") + data["author"] = common_api_data.get("author") + data["keywords"] = [k.strip() for k in common_api_data.get("keywords_str", "").split(',') if k.strip()] + data["system_prompt"] = common_api_data.get("system_prompt") + data["custom_prompt"] = common_api_data.get("custom_prompt") + data["api_name"] = common_api_data.get("api_name") + data["api_key"] = common_api_data.get("api_key") + + current_field_template_for_error = f"#tldw-api-xml-auto-summarize-{media_type}" + data["auto_summarize"] = app.query_one(f"#tldw-api-xml-auto-summarize-{media_type}", Checkbox).value + return ProcessXMLRequest(**data) + except QueryError as e: + logger.error(f"Error querying XML-specific TLDW API form field (around {current_field_template_for_error.format(media_type=media_type)}): {e}") + app.notify(f"Error: Missing XML form field. Details: {e}", severity="error") + raise + except ValueError as e: + logger.error(f"Error creating XML request model (around {current_field_template_for_error.format(media_type=media_type)}): {e}") + app.notify(f"Error: Invalid value in XML form field (around {current_field_template_for_error.format(media_type=media_type)}).", severity="error") + raise + +def _collect_mediawiki_specific_data(app: 'TldwCli', common_api_data: Dict[str, Any], media_type: str) -> ProcessMediaWikiRequest: + data = {} + current_field_template_for_error = "Unknown MediaWiki Field-{media_type}" + try: + current_field_template_for_error = f"#tldw-api-mediawiki-wiki-name-{media_type}" + data["wiki_name"] = app.query_one(f"#tldw-api-mediawiki-wiki-name-{media_type}", Input).value or "default_wiki" + current_field_template_for_error = f"#tldw-api-mediawiki-namespaces-{media_type}" + data["namespaces_str"] = app.query_one(f"#tldw-api-mediawiki-namespaces-{media_type}", Input).value or None + current_field_template_for_error = f"#tldw-api-mediawiki-skip-redirects-{media_type}" + data["skip_redirects"] = app.query_one(f"#tldw-api-mediawiki-skip-redirects-{media_type}", Checkbox).value + data["chunk_max_size"] = common_api_data.get("chunk_size", 1000) + return ProcessMediaWikiRequest(**data) + except QueryError as e: + logger.error(f"Error querying MediaWiki-specific TLDW API form field (around {current_field_template_for_error.format(media_type=media_type)}): {e}") + app.notify(f"Error: Missing MediaWiki form field. Details: {e}", severity="error") + raise + + +async def handle_tldw_api_submit_button_pressed(app: 'TldwCli', event: Button.Pressed) -> None: + if not event.button.id: + logger.error("Submit button pressed but has no ID. Cannot determine media_type.") + app.notify("Critical error: Submit button has no ID.", severity="error") + return + + logger.info(f"TLDW API Submit button pressed: {event.button.id}") + + selected_media_type = event.button.id.replace("tldw-api-submit-", "") + logger.info(f"Extracted media_type: {selected_media_type} from button ID.") + + app.notify(f"Processing {selected_media_type} request via tldw API...") - auth_token: Optional[str] = None + try: + loading_indicator = app.query_one(f"#tldw-api-loading-indicator-{selected_media_type}", LoadingIndicator) + status_area = app.query_one(f"#tldw-api-status-area-{selected_media_type}", TextArea) + submit_button = event.button # This is already the correct button + endpoint_url_input = app.query_one(f"#tldw-api-endpoint-url-{selected_media_type}", Input) + auth_method_select = app.query_one(f"#tldw-api-auth-method-{selected_media_type}", Select) + except QueryError as e: + logger.error(f"Critical UI component missing for media_type '{selected_media_type}': {e}") + app.notify(f"Error: UI component missing for {selected_media_type}: {e.widget.id if hasattr(e, 'widget') and e.widget else 'Unknown'}. Cannot proceed.", severity="error") + return + + endpoint_url = endpoint_url_input.value.strip() + auth_method = str(auth_method_select.value) # Ensure it's a string + + # --- Input Validation --- + if not endpoint_url: + app.notify("API Endpoint URL is required.", severity="error") + endpoint_url_input.focus() + # No need to revert UI state as it hasn't been changed yet + return + + if not (endpoint_url.startswith("http://") or endpoint_url.startswith("https://")): + app.notify("API Endpoint URL must start with http:// or https://.", severity="error") + endpoint_url_input.focus() + # No need to revert UI state + return + + if auth_method == str(Select.BLANK): + app.notify("Please select an Authentication Method.", severity="error") + auth_method_select.focus() + return + + # --- Set UI to Loading State --- + loading_indicator.display = True + status_area.clear() + status_area.load_text("Validating inputs and preparing request...") + status_area.display = True + submit_button.disabled = True + # app.notify is already called at the start of the function + + # --- Get Auth Token (after basic validations pass) --- + auth_token: Optional[str] = None + try: if auth_method == "custom_token": - custom_token_input = app.query_one("#tldw-api-custom-token", Input) + custom_token_input = app.query_one(f"#tldw-api-custom-token-{selected_media_type}", Input) auth_token = custom_token_input.value.strip() if not auth_token: app.notify("Custom Auth Token is required for selected method.", severity="error") custom_token_input.focus() + # Revert UI loading state + loading_indicator.display = False + submit_button.disabled = False + status_area.load_text("Custom token required. Submission halted.") return elif auth_method == "config_token": auth_token = app.app_config.get("tldw_api", {}).get("auth_token_config") if not auth_token: app.notify("Auth Token not found in tldw_api.auth_token_config. Please configure or use custom.", severity="error") + # Revert UI loading state + loading_indicator.display = False + submit_button.disabled = False + status_area.load_text("Config token missing. Submission halted.") return - # Add more auth methods like ENV VAR here if needed - except QueryError as e: - logger.error(f"UI component not found for TLDW API submission: {e}") - app.notify(f"Error: Missing required UI field: {e.widget.id if e.widget else 'Unknown'}", severity="error") + logger.error(f"UI component not found for TLDW API auth token for {selected_media_type}: {e}") + app.notify(f"Error: Missing UI field for auth for {selected_media_type}: {e.widget.id if hasattr(e, 'widget') and e.widget else 'Unknown'}", severity="error") + loading_indicator.display = False + submit_button.disabled = False + status_area.load_text("Error accessing auth fields. Submission halted.") return - # 2. Collect Form Data and Create Request Model + status_area.load_text("Collecting form data and building request...") request_model: Optional[Any] = None local_file_paths: Optional[List[str]] = None try: - common_data = _collect_common_form_data(app) - local_file_paths = common_data.pop("local_files", []) # Extract local files - common_data["api_key"] = auth_token # Pass the resolved token as api_key for the request model + common_data = _collect_common_form_data(app, selected_media_type) # Pass selected_media_type + local_file_paths = common_data.pop("local_files", []) + common_data["api_key"] = auth_token if selected_media_type == "video": - request_model = _collect_video_specific_data(app, common_data) + request_model = _collect_video_specific_data(app, common_data, selected_media_type) elif selected_media_type == "audio": - request_model = _collect_audio_specific_data(app, common_data) - # Add elif for ProcessPDFRequest, ProcessEbookRequest, etc. - # Example for PDF: - # elif selected_media_type == "pdf": - # specific_pdf_data = {} # Collect PDF specific fields - # request_model = ProcessPDFRequest(**common_data, **specific_pdf_data) + request_model = _collect_audio_specific_data(app, common_data, selected_media_type) + elif selected_media_type == "pdf": + request_model = _collect_pdf_specific_data(app, common_data, selected_media_type) + elif selected_media_type == "ebook": + request_model = _collect_ebook_specific_data(app, common_data, selected_media_type) + elif selected_media_type == "document": + request_model = _collect_document_specific_data(app, common_data, selected_media_type) + elif selected_media_type == "xml": + request_model = _collect_xml_specific_data(app, common_data, selected_media_type) + elif selected_media_type == "mediawiki_dump": + request_model = _collect_mediawiki_specific_data(app, common_data, selected_media_type) else: app.notify(f"Media type '{selected_media_type}' not yet supported by this client form.", severity="warning") + loading_indicator.display = False + submit_button.disabled = False + status_area.load_text("Unsupported media type selected. Submission halted.") return - - except QueryError: # Already handled by app.notify in collectors - return - except ValueError: # Already handled + except (QueryError, ValueError) as e: + logger.error(f"Error collecting form data for {selected_media_type}: {e}", exc_info=True) + app.notify(f"Error in form data for {selected_media_type}: {str(e)[:100]}. Please check fields.", severity="error") + loading_indicator.display = False + submit_button.disabled = False + status_area.load_text(f"Error processing form data: {str(e)[:100]}. Submission halted.") return except Exception as e: - logger.error(f"Error preparing request model for TLDW API: {e}", exc_info=True) + logger.error(f"Unexpected error preparing request model for TLDW API ({selected_media_type}): {e}", exc_info=True) app.notify("Error: Could not prepare data for API request.", severity="error") + loading_indicator.display = False + submit_button.disabled = False + status_area.load_text("Unexpected error preparing request. Submission halted.") return if not request_model: app.notify("Failed to create request model.", severity="error") + loading_indicator.display = False + submit_button.disabled = False + status_area.load_text("Internal error: Failed to create request model. Submission halted.") return - # Ensure URLs and local_file_paths are not both empty if they are the primary inputs - if not request_model.urls and not local_file_paths: - app.notify("Please provide at least one URL or one local file path.", severity="warning") - try: - app.query_one("#tldw-api-urls", TextArea).focus() - except QueryError: pass - return + # URL/Local file validation (adjust for XML/MediaWiki which primarily use local_file_paths) + if not getattr(request_model, 'urls', None) and not local_file_paths: + # This check might be specific to certain request models, adjust if necessary + # For XML and MediaWiki, local_file_paths is primary and urls might not exist on model + is_xml_or_mediawiki = selected_media_type in ["xml", "mediawiki_dump"] + if not is_xml_or_mediawiki or (is_xml_or_mediawiki and not local_file_paths): + app.notify("Please provide at least one URL or one local file path.", severity="warning") + try: + app.query_one(f"#tldw-api-urls-{selected_media_type}", TextArea).focus() + except QueryError: pass + loading_indicator.display = False + submit_button.disabled = False + status_area.load_text("Missing URL or local file. Submission halted.") + return + status_area.load_text("Connecting to TLDW API and sending request...") + api_client = TLDWAPIClient(base_url=endpoint_url, token=auth_token) + overwrite_db = common_data.get("overwrite_existing_db", False) # From common_data - # 3. Initialize API Client and Run Worker - api_client = TLDWAPIClient(base_url=endpoint_url, token=auth_token) # Token for client, api_key in model for server - overwrite_db = common_data.get("overwrite_existing_db", False) # Get the DB overwrite flag + # Worker and callbacks remain largely the same but need to use the correct UI element IDs for this tab + # The on_worker_success and on_worker_failure need to know which loading_indicator/submit_button/status_area to update. + # This is implicitly handled as they are queried again using the selected_media_type. - async def process_media_worker(): - nonlocal request_model # Allow modification for XML/MediaWiki + async def process_media_worker(): # This worker is fine + nonlocal request_model try: if selected_media_type == "video": return await api_client.process_video(request_model, local_file_paths) elif selected_media_type == "audio": return await api_client.process_audio(request_model, local_file_paths) - # Add elif for other types... - # elif selected_media_type == "xml": - # if not local_file_paths: raise ValueError("XML processing requires a local file path.") - # return await api_client.process_xml(request_model, local_file_paths[0]) # XML takes single path - # elif selected_media_type == "mediawiki_dump": - # if not local_file_paths: raise ValueError("MediaWiki processing requires a local file path.") - # # For streaming, the worker should yield, not return directly. - # # This example shows how to initiate and collect, actual handling of stream in on_success would differ. - # results = [] - # async for item in api_client.process_mediawiki_dump(request_model, local_file_paths[0]): - # results.append(item) # Collect all streamed items - # return results # Return collected list for on_success + elif selected_media_type == "pdf": + return await api_client.process_pdf(request_model, local_file_paths) + elif selected_media_type == "ebook": + return await api_client.process_ebook(request_model, local_file_paths) + elif selected_media_type == "document": + return await api_client.process_document(request_model, local_file_paths) + elif selected_media_type == "xml": + if not local_file_paths: raise ValueError("XML processing requires a local file path.") + return await api_client.process_xml(request_model, local_file_paths[0]) + elif selected_media_type == "mediawiki_dump": + if not local_file_paths: raise ValueError("MediaWiki processing requires a local file path.") + # For streaming, the worker should yield, not return directly. + # This example shows how to initiate and collect, actual handling of stream in on_success would differ. + results = [] + async for item in api_client.process_mediawiki_dump(request_model, local_file_paths[0]): + results.append(item) + return results else: raise NotImplementedError(f"Client-side processing for {selected_media_type} not implemented.") finally: await api_client.close() - def on_worker_success(response_data: Any): # Type hint can be Union of BatchMediaProcessResponse, etc. - app.notify("TLDW API request successful. Ingesting results...", timeout=3) - logger.info(f"TLDW API Response: {response_data}") + def on_worker_success(response_data: Any): + # Query the specific UI elements for this tab + try: + current_loading_indicator = app.query_one(f"#tldw-api-loading-indicator-{selected_media_type}", LoadingIndicator) + current_loading_indicator.display = False + # current_submit_button = app.query_one(f"#tldw-api-submit-{selected_media_type}", Button) # Button instance is already event.button + submit_button.disabled = False # submit_button is already defined from event.button + except QueryError as e_ui: + logger.error(f"UI component not found in on_worker_success for {selected_media_type}: {e_ui}") + + app.notify(f"TLDW API request for {selected_media_type} successful. Processing results...", timeout=2) + logger.info(f"TLDW API Response for {selected_media_type}: {response_data}") + + try: + current_status_area = app.query_one(f"#tldw-api-status-area-{selected_media_type}", TextArea) + current_status_area.clear() + except QueryError: + logger.error(f"Could not find status_area for {selected_media_type} in on_worker_success.") + return # Cannot display results + if not app.media_db: logger.error("Media_DB_v2 not initialized. Cannot ingest API results.") app.notify("Error: Local media database not available.", severity="error") + current_status_area.load_text("## Error\n\nLocal media database not available.") return processed_count = 0 error_count = 0 + successful_ingestions_details = [] # To store details of successful items # Handle different response types results_to_ingest: List[MediaItemProcessResult] = [] - if isinstance(response_data, dict) and "results" in response_data and "processed_count" in response_data: # Standard BatchMediaProcessResponse - typed_response = response_data # It's already a dict here, Pydantic parsing happened in client - results_to_ingest = [MediaItemProcessResult(**item) for item in typed_response.get("results", [])] + if isinstance(response_data, BatchMediaProcessResponse): + results_to_ingest = response_data.results + elif isinstance(response_data, dict) and "results" in response_data: + if "processed_count" in response_data: + raw_results = response_data.get("results", []) + for item_dict in raw_results: + # Try to coerce into MediaItemProcessResult, might need specific mapping for XML + # For now, assume XML result items can be mostly mapped. + results_to_ingest.append(MediaItemProcessResult(**item_dict)) elif isinstance(response_data, list) and all(isinstance(item, ProcessedMediaWikiPage) for item in response_data): # MediaWiki dump (if collected into a list by worker) @@ -1098,13 +1300,19 @@ def on_worker_success(response_data: Any): # Type hint can be Union of BatchMedi media_type="mediawiki_article", # or "mediawiki_page" metadata={"title": mw_page.title, "page_id": mw_page.page_id, "namespace": mw_page.namespace, "revision_id": mw_page.revision_id, "timestamp": mw_page.timestamp}, content=mw_page.content, - chunks=[{"text": chunk.get("text", ""), "metadata": chunk.get("metadata", {})} for chunk in mw_page.chunks] if mw_page.chunks else None, # Simplified chunk adaptation - # analysis, summary, etc. might not be directly available from MediaWiki processing + chunks=[{"text": chunk.get("text", ""), "metadata": chunk.get("metadata", {})} for chunk in mw_page.chunks] if mw_page.chunks else None, )) + else: + logger.error(f"Unexpected TLDW API response data type for {selected_media_type}: {type(response_data)}.") + current_status_area.load_text(f"## API Request Processed\n\nUnexpected response format. Raw response logged.") + current_status_area.display = True + app.notify("Error: Received unexpected data format from API.", severity="error") + return # Add elif for XML if it returns a single ProcessXMLResponseItem or similar for item_result in results_to_ingest: if item_result.status == "Success": + media_id_ingested = None # For storing the ID if ingestion is successful try: # Prepare data for add_media_with_keywords # Keywords: API response might not have 'keywords'. Use originally submitted ones if available. @@ -1148,35 +1356,131 @@ def on_worker_success(response_data: Any): # Type hint can be Union of BatchMedi chunks=unvectorized_chunks_to_save # Pass prepared chunks ) if media_id: - logger.info(f"Successfully ingested '{item_result.input_ref}' into local DB. Media ID: {media_id}. Message: {msg}") + logger.info(f"Successfully ingested '{item_result.input_ref}' into local DB for {selected_media_type}. Media ID: {media_id}. Message: {msg}") processed_count += 1 + media_id_ingested = media_id # Store the ID else: - logger.error(f"Failed to ingest '{item_result.input_ref}' into local DB. Message: {msg}") + logger.error(f"Failed to ingest '{item_result.input_ref}' into local DB for {selected_media_type}. Message: {msg}") error_count += 1 except Exception as e_ingest: - logger.error(f"Error ingesting item '{item_result.input_ref}' into local DB: {e_ingest}", exc_info=True) + logger.error(f"Error ingesting item '{item_result.input_ref}' for {selected_media_type} into local DB: {e_ingest}", exc_info=True) error_count += 1 + + if media_id_ingested: # Only add to details if successfully ingested + successful_ingestions_details.append({ + "input_ref": item_result.input_ref, + "title": item_result.metadata.get("title", "N/A") if item_result.metadata else "N/A", + "media_type": item_result.media_type, + "db_id": media_id_ingested + }) else: - logger.error(f"API processing error for '{item_result.input_ref}': {item_result.error}") + logger.error(f"API processing error for '{item_result.input_ref}' ({selected_media_type}): {item_result.error}") error_count += 1 - final_msg = f"Ingestion complete. Processed: {processed_count}, Errors: {error_count}." - app.notify(final_msg, severity="information" if error_count == 0 else "warning", timeout=5) + summary_parts = [f"## TLDW API Request Successful ({selected_media_type.title()})\n\n"] + # ... (rest of summary construction similar to before) ... + if processed_count == 0 and error_count == 0 and not results_to_ingest: + summary_parts.append("API request successful, but no items were provided or found for processing.\n") + elif processed_count == 0 and error_count > 0: + summary_parts.append(f"API request successful, but no new items were ingested due to errors.\n") + summary_parts.append(f"- Successfully processed items by API: {processed_count}\n") # This might be confusing if API said success but ingest failed + summary_parts.append(f"- Items with errors during API processing or local ingestion: {error_count}\n") + else: + summary_parts.append(f"- Successfully processed and ingested items: {processed_count}\n") + summary_parts.append(f"- Items with errors during API processing or local ingestion: {error_count}\n\n") + + if error_count > 0: + summary_parts.append("**Please check the application logs for details on any errors.**\n\n") + + if successful_ingestions_details: + if len(successful_ingestions_details) <= 5: + summary_parts.append("### Successfully Ingested Items:\n") + for detail in successful_ingestions_details: + title_str = f" (Title: {detail['title']})" if detail['title'] != 'N/A' else "" + summary_parts.append(f"- **Input:** `{detail['input_ref']}`{title_str}\n") # Use backticks for input ref + summary_parts.append(f" - **Type:** {detail['media_type']}, **DB ID:** {detail['db_id']}\n") + else: + summary_parts.append(f"Details for {len(successful_ingestions_details)} successfully ingested items are available in the logs.\n") + elif processed_count > 0 : # Processed but no details (should not happen if logic is correct) + summary_parts.append("Successfully processed items, but details are unavailable.\n") + + + current_status_area.load_text("".join(summary_parts)) + current_status_area.display = True + current_status_area.scroll_home(animate=False) + + notify_msg = f"{selected_media_type.title()} Ingestion: {processed_count} done, {error_count} errors." + app.notify(notify_msg, severity="information" if error_count == 0 and processed_count > 0 else "warning", timeout=6) + def on_worker_failure(error: Exception): - logger.error(f"TLDW API request worker failed: {error}", exc_info=True) + try: + current_loading_indicator = app.query_one(f"#tldw-api-loading-indicator-{selected_media_type}", LoadingIndicator) + current_loading_indicator.display = False + # current_submit_button = app.query_one(f"#tldw-api-submit-{selected_media_type}", Button) + submit_button.disabled = False # submit_button is already defined from event.button + except QueryError as e_ui: + logger.error(f"UI component not found in on_worker_failure for {selected_media_type}: {e_ui}") + + logger.error(f"TLDW API request worker failed for {selected_media_type}: {error}", exc_info=True) + + error_message_parts = [f"## API Request Failed! ({selected_media_type.title()})\n\n"] + # ... (rest of error message construction as before) ... + brief_notify_message = f"{selected_media_type.title()} API Request Failed." if isinstance(error, APIResponseError): - app.notify(f"API Error {error.status_code}: {str(error)[:200]}", severity="error", timeout=8) - elif isinstance(error, (APIConnectionError, APIRequestError, AuthenticationError)): - app.notify(f"API Client Error: {str(error)[:200]}", severity="error", timeout=8) + error_type = "API Error" + error_message_parts.append(f"**Type:** API Error\n**Status Code:** {error.status_code}\n**Message:** `{str(error)}`\n") + if error.detail: + error_message_parts.append(f"**Details:**\n```\n{error.detail}\n```\n") + brief_notify_message = f"{selected_media_type.title()} API Error {error.status_code}: {str(error)[:50]}" + if error.response_data: + try: + # Try to pretty-print if it's JSON, otherwise just str + response_data_str = json.dumps(error.response_data, indent=2) + except (TypeError, ValueError): + response_data_str = str(error.response_data) + error_message_parts.append(f"**Response Data:**\n```json\n{response_data_str}\n```\n") + brief_notify_message = f"API Error {error.status_code}: {str(error)[:100]}" + elif isinstance(error, AuthenticationError): + error_type = "Authentication Error" + error_message_parts.append(f"**Type:** {error_type}\n") + error_message_parts.append(f"**Message:** `{str(error)}`\n") + brief_notify_message = f"Auth Error: {str(error)[:100]}" + elif isinstance(error, APIConnectionError): + error_type = "Connection Error" + error_message_parts.append(f"**Type:** {error_type}\n") + error_message_parts.append(f"**Message:** `{str(error)}`\n") + brief_notify_message = f"Connection Error: {str(error)[:100]}" + elif isinstance(error, APIRequestError): + error_type = "API Request Error" + error_message_parts.append(f"**Type:** {error_type}\n") + error_message_parts.append(f"**Message:** `{str(error)}`\n") + brief_notify_message = f"Request Error: {str(error)[:100]}" else: - app.notify(f"TLDW API processing failed: {str(error)[:200]}", severity="error", timeout=8) + error_type = "General Error" + error_message_parts.append(f"**Type:** {error_type}\n") + error_message_parts.append(f"**Message:** `{str(error)}`\n") + brief_notify_message = f"Processing failed: {str(error)[:100]}" + + try: + current_status_area = app.query_one(f"#tldw-api-status-area-{selected_media_type}", TextArea) + current_status_area.clear() + current_status_area.load_text("".join(error_message_parts)) + current_status_area.display = True + current_status_area.scroll_home(animate=False) + except QueryError: + logger.error(f"Could not find status_area for {selected_media_type} to display error.") + app.notify(f"Critical: Status area for {selected_media_type} not found. Error: {brief_notify_message}", severity="error", timeout=10) + return + + app.notify(brief_notify_message, severity="error", timeout=8) + app.run_worker( process_media_worker, - name="tldw_api_media_processing", + name=f"tldw_api_processing_{selected_media_type}", # Unique worker name per tab group="api_calls", - description="Processing media via TLDW API" + description=f"Processing {selected_media_type} media via TLDW API" ) diff --git a/tldw_chatbook/UI/Conv_Char_Window.py b/tldw_chatbook/UI/Conv_Char_Window.py index ea887eda..9f16fe64 100644 --- a/tldw_chatbook/UI/Conv_Char_Window.py +++ b/tldw_chatbook/UI/Conv_Char_Window.py @@ -83,11 +83,27 @@ def compose(self) -> ComposeResult: yield Button("Edit this Character", id="ccp-card-edit-button", variant="default") yield Button("Save Changes", id="ccp-card-save-button", variant="success") # Added variant yield Button("Clone Character", id="ccp-card-clone-button", variant="primary") # Added variant - # Container for character editing UI (initially hidden by CSS) with Container(id="ccp-character-editor-view", classes="ccp-view-area"): yield Static("Character Editor", classes="pane-title", id="ccp-center-pane-title-char-editor") - # Character editor fields will be mounted here + yield Label("Character Name:", classes="sidebar-label") + yield Input(id="ccp-editor-char-name-input", placeholder="Character name...", classes="sidebar-input") + yield Label("Avatar Path/URL:", classes="sidebar-label") + yield Input(id="ccp-editor-char-avatar-input", placeholder="Path or URL to avatar image...", classes="sidebar-input") + yield Label("Description:", classes="sidebar-label") + yield TextArea(id="ccp-editor-char-description-textarea", classes="sidebar-textarea ccp-prompt-textarea") + yield Label("Personality:", classes="sidebar-label") + yield TextArea(id="ccp-editor-char-personality-textarea", classes="sidebar-textarea ccp-prompt-textarea") + yield Label("Scenario:", classes="sidebar-label") + yield TextArea(id="ccp-editor-char-scenario-textarea", classes="sidebar-textarea ccp-prompt-textarea") + yield Label("First Message (Greeting):", classes="sidebar-label") + yield TextArea(id="ccp-editor-char-first-message-textarea", classes="sidebar-textarea ccp-prompt-textarea") + yield Label("Keywords (comma-separated):", classes="sidebar-label") + yield TextArea(id="ccp-editor-char-keywords-textarea", classes="sidebar-textarea ccp-prompt-textarea") + with Horizontal(classes="ccp-prompt-action-buttons"): + yield Button("Save Character", id="ccp-editor-char-save-button", variant="success", classes="sidebar-button") + yield Button("Clone Character", id="ccp-editor-char-clone-button", classes="sidebar-button") + yield Button("Cancel Edit", id="ccp-editor-char-cancel-button", variant="error", classes="sidebar-button hidden") # Container for prompt editing UI (initially hidden by CSS) with Container(id="ccp-prompt-editor-view", classes="ccp-view-area"): diff --git a/tldw_chatbook/UI/Ingest_Window.py b/tldw_chatbook/UI/Ingest_Window.py index e2aa1828..12adbe1c 100644 --- a/tldw_chatbook/UI/Ingest_Window.py +++ b/tldw_chatbook/UI/Ingest_Window.py @@ -7,7 +7,7 @@ # 3rd-Party Imports from textual.app import ComposeResult from textual.containers import Container, VerticalScroll, Horizontal, Vertical -from textual.widgets import Static, Button, Input, Select, Checkbox, TextArea, Label, RadioSet, RadioButton, Collapsible, ListView, ListItem, Markdown +from textual.widgets import Static, Button, Input, Select, Checkbox, TextArea, Label, RadioSet, RadioButton, Collapsible, ListView, ListItem, Markdown, LoadingIndicator # # Local Imports from ..tldw_api.schemas import MediaType, ChunkMethod, PdfEngine # Import Enums @@ -24,13 +24,17 @@ # # Functions: +MEDIA_TYPES = ['video', 'audio', 'document', 'pdf', 'ebook', 'xml', 'mediawiki_dump'] + INGEST_VIEW_IDS = [ "ingest-view-prompts", "ingest-view-characters", - "ingest-view-media", "ingest-view-notes", "ingest-view-tldw-api" + "ingest-view-media", "ingest-view-notes", + *[f"ingest-view-tldw-api-{mt}" for mt in MEDIA_TYPES] ] INGEST_NAV_BUTTON_IDS = [ "ingest-nav-prompts", "ingest-nav-characters", - "ingest-nav-media", "ingest-nav-notes", "ingest-nav-tldw-api" + "ingest-nav-media", "ingest-nav-notes", + *[f"ingest-nav-tldw-api-{mt}" for mt in MEDIA_TYPES] ] class IngestWindow(Container): @@ -38,8 +42,8 @@ def __init__(self, app_instance: 'TldwCli', **kwargs): super().__init__(**kwargs) self.app_instance = app_instance - def compose_tldw_api_form(self) -> ComposeResult: - """Composes the form for 'Ingest Media via tldw API'.""" + def compose_tldw_api_form(self, media_type: str) -> ComposeResult: + """Composes the common part of the form for 'Ingest Media via tldw API'.""" # Get default API URL from app config default_api_url = self.app_instance.app_config.get("tldw_api", {}).get("base_url", "http://127.0.0.1:8000") @@ -52,10 +56,10 @@ def compose_tldw_api_form(self) -> ComposeResult: if not analysis_provider_options: analysis_provider_options = [("No Providers Configured", Select.BLANK)] - with VerticalScroll(classes="ingest-form-scrollable"): + with VerticalScroll(classes="ingest-form-scrollable"): # TODO: Consider if this scrollable itself needs a unique ID if we have nested ones. For now, assuming not. yield Static("TLDW API Configuration", classes="sidebar-title") yield Label("API Endpoint URL:") - yield Input(default_api_url, id="tldw-api-endpoint-url", placeholder="http://localhost:8000") + yield Input(default_api_url, id=f"tldw-api-endpoint-url-{media_type}", placeholder="http://localhost:8000") yield Label("Authentication Method:") yield Select( @@ -65,154 +69,172 @@ def compose_tldw_api_form(self) -> ComposeResult: # ("Environment Variable", "env_var_token") # Future: add if needed ], prompt="Select Auth Method...", - id="tldw-api-auth-method", + id=f"tldw-api-auth-method-{media_type}", value="config_token" # Default ) - yield Label("Custom Auth Token:", id="tldw-api-custom-token-label", classes="hidden") # Hidden by default + yield Label("Custom Auth Token:", id=f"tldw-api-custom-token-label-{media_type}", classes="hidden") # Hidden by default yield Input( "", - id="tldw-api-custom-token", + id=f"tldw-api-custom-token-{media_type}", placeholder="Enter custom Bearer token", password=True, - classes="hidden" # Hidden by default + classes="hidden", # Hidden by default + tooltip="Enter your Bearer token for the TLDW API. This is used if 'Custom Token' is selected as the authentication method." ) yield Static("Media Details & Processing Options", classes="sidebar-title") - yield Label("Media Type to Process:") - # Use values from the MediaType Literal for options - media_type_options = [(mt, mt) for mt in MediaType.__args__] - yield Select(media_type_options, prompt="Select Media Type...", id="tldw-api-media-type") + # Media Type selection is now handled by which form is shown # --- Common Input Fields --- + # FIXME/TODO: Consider if URL/Local File input is applicable for all media_type or if this also needs to be specific + # For example, mediawiki_dump typically uses a local file path. yield Label("Media URLs (one per line):") - yield TextArea(id="tldw-api-urls", language="plain_text", classes="ingest-textarea-small") + yield TextArea(id=f"tldw-api-urls-{media_type}", language="plain_text", classes="ingest-textarea-small") yield Label( "Local File Paths (one per line, if API supports local path references or for client-side upload):") - yield TextArea(id="tldw-api-local-files", language="plain_text", classes="ingest-textarea-small") + yield TextArea(id=f"tldw-api-local-files-{media_type}", language="plain_text", classes="ingest-textarea-small") - with Horizontal(classes="ingest-form-row"): + with Horizontal(classes="title-author-row"): # Changed class here with Vertical(classes="ingest-form-col"): yield Label("Title (Optional):") - yield Input(id="tldw-api-title", placeholder="Optional title override") + yield Input(id=f"tldw-api-title-{media_type}", placeholder="Optional title override") with Vertical(classes="ingest-form-col"): yield Label("Author (Optional):") - yield Input(id="tldw-api-author", placeholder="Optional author override") + yield Input(id=f"tldw-api-author-{media_type}", placeholder="Optional author override") yield Label("Keywords (comma-separated):") - yield TextArea(id="tldw-api-keywords", classes="ingest-textarea-small") + yield TextArea(id=f"tldw-api-keywords-{media_type}", classes="ingest-textarea-small") # --- Common Processing Options --- yield Label("Custom Prompt (for analysis):") - yield TextArea(id="tldw-api-custom-prompt", classes="ingest-textarea-medium") + yield TextArea(id=f"tldw-api-custom-prompt-{media_type}", classes="ingest-textarea-medium") yield Label("System Prompt (for analysis):") - yield TextArea(id="tldw-api-system-prompt", classes="ingest-textarea-medium") - yield Checkbox("Perform Analysis (e.g., Summarization)", True, id="tldw-api-perform-analysis") + yield TextArea(id=f"tldw-api-system-prompt-{media_type}", classes="ingest-textarea-medium") + yield Checkbox("Perform Analysis (e.g., Summarization)", True, id=f"tldw-api-perform-analysis-{media_type}") yield Label("Analysis API Provider (if analysis enabled):") - yield Select(analysis_provider_options, id="tldw-api-analysis-api-name", + yield Select(analysis_provider_options, id=f"tldw-api-analysis-api-name-{media_type}", prompt="Select API for Analysis...") # --- Common Chunking Options --- - with Collapsible(title="Chunking Options", collapsed=True, id="tldw-api-chunking-collapsible"): - yield Checkbox("Perform Chunking", True, id="tldw-api-perform-chunking") + with Collapsible(title="Chunking Options", collapsed=True, id=f"tldw-api-chunking-collapsible-{media_type}"): + yield Checkbox("Perform Chunking", True, id=f"tldw-api-perform-chunking-{media_type}") yield Label("Chunking Method:") chunk_method_options = [(cm, cm) for cm in ChunkMethod.__args__] - yield Select(chunk_method_options, id="tldw-api-chunk-method", prompt="Default (per type)") + yield Select(chunk_method_options, id=f"tldw-api-chunk-method-{media_type}", prompt="Default (per type)") with Horizontal(classes="ingest-form-row"): with Vertical(classes="ingest-form-col"): yield Label("Chunk Size:") - yield Input("500", id="tldw-api-chunk-size", type="integer") + yield Input("500", id=f"tldw-api-chunk-size-{media_type}", type="integer") with Vertical(classes="ingest-form-col"): yield Label("Chunk Overlap:") - yield Input("200", id="tldw-api-chunk-overlap", type="integer") + yield Input("200", id=f"tldw-api-chunk-overlap-{media_type}", type="integer") yield Label("Chunk Language (e.g., 'en', optional):") - yield Input(id="tldw-api-chunk-lang", placeholder="Defaults to media language") - yield Checkbox("Use Adaptive Chunking", False, id="tldw-api-adaptive-chunking") - yield Checkbox("Use Multi-level Chunking", False, id="tldw-api-multi-level-chunking") + yield Input(id=f"tldw-api-chunk-lang-{media_type}", placeholder="Defaults to media language") + yield Checkbox("Use Adaptive Chunking", False, id=f"tldw-api-adaptive-chunking-{media_type}") + yield Checkbox("Use Multi-level Chunking", False, id=f"tldw-api-multi-level-chunking-{media_type}") yield Label("Custom Chapter Pattern (Regex, optional):") - yield Input(id="tldw-api-custom-chapter-pattern", placeholder="e.g., ^Chapter\\s+\\d+") + yield Input(id=f"tldw-api-custom-chapter-pattern-{media_type}", placeholder="e.g., ^Chapter\\s+\\d+") # --- Common Analysis Options --- with Collapsible(title="Advanced Analysis Options", collapsed=True, - id="tldw-api-analysis-opts-collapsible"): - yield Checkbox("Summarize Recursively (if chunked)", False, id="tldw-api-summarize-recursively") - yield Checkbox("Perform Rolling Summarization", False, id="tldw-api-perform-rolling-summarization") + id=f"tldw-api-analysis-opts-collapsible-{media_type}"): + yield Checkbox("Summarize Recursively (if chunked)", False, id=f"tldw-api-summarize-recursively-{media_type}") + yield Checkbox("Perform Rolling Summarization", False, id=f"tldw-api-perform-rolling-summarization-{media_type}") # Add more analysis options here as needed - # --- Media-Type Specific Option Containers (initially hidden) --- - # Video Options - with Container(id=TLDW_API_VIDEO_OPTIONS_ID, classes="tldw-api-media-specific-options hidden"): - yield Static("Video Specific Options", classes="sidebar-title") - yield Label("Transcription Model:") - yield Input("deepdml/faster-whisper-large-v3-turbo-ct2", id="tldw-api-video-transcription-model") - yield Label("Transcription Language (e.g., 'en'):") - yield Input("en", id="tldw-api-video-transcription-language") - yield Checkbox("Enable Speaker Diarization", False, id="tldw-api-video-diarize") - yield Checkbox("Include Timestamps in Transcription", True, id="tldw-api-video-timestamp") - yield Checkbox("Enable VAD (Voice Activity Detection)", False, id="tldw-api-video-vad") - yield Checkbox("Perform Confabulation Check of Analysis", False, id="tldw-api-video-confab-check") - with Horizontal(classes="ingest-form-row"): - with Vertical(classes="ingest-form-col"): - yield Label("Start Time (HH:MM:SS or secs):") - yield Input(id="tldw-api-video-start-time", placeholder="Optional") - with Vertical(classes="ingest-form-col"): - yield Label("End Time (HH:MM:SS or secs):") - yield Input(id="tldw-api-video-end-time", placeholder="Optional") - - # Audio Options (Example structure, add fields as needed) - with Container(id=TLDW_API_AUDIO_OPTIONS_ID, classes="tldw-api-media-specific-options hidden"): - yield Static("Audio Specific Options", classes="sidebar-title") - yield Label("Transcription Model:") - yield Input("deepdml/faster-distil-whisper-large-v3.5", id="tldw-api-audio-transcription-model") - # Add other audio specific fields from ProcessAudioRequest: lang, diarize, timestamp, vad, confab - - # PDF Options - with Container(id=TLDW_API_PDF_OPTIONS_ID, classes="tldw-api-media-specific-options hidden"): - yield Static("PDF Specific Options", classes="sidebar-title") + # --- Media-Type Specific Option Containers have been removed from this common method --- + # They will be added to the media-type specific views/containers directly. + + # --- Inserted Media-Type Specific Options --- + if media_type == "video": + with Container(id=TLDW_API_VIDEO_OPTIONS_ID, classes="tldw-api-media-specific-options"): # ID of container itself is fine + yield Static("Video Specific Options", classes="sidebar-title") + yield Label("Transcription Model:") + yield Input("deepdml/faster-whisper-large-v3-turbo-ct2", id=f"tldw-api-video-transcription-model-{media_type}") + yield Label("Transcription Language (e.g., 'en'):") + yield Input("en", id=f"tldw-api-video-transcription-language-{media_type}") + yield Checkbox("Enable Speaker Diarization", False, id=f"tldw-api-video-diarize-{media_type}") + yield Checkbox("Include Timestamps in Transcription", True, id=f"tldw-api-video-timestamp-{media_type}") + yield Checkbox("Enable VAD (Voice Activity Detection)", False, id=f"tldw-api-video-vad-{media_type}") + yield Checkbox("Perform Confabulation Check of Analysis", False, id=f"tldw-api-video-confab-check-{media_type}") + with Horizontal(classes="ingest-form-row"): + with Vertical(classes="ingest-form-col"): + yield Label("Start Time (HH:MM:SS or secs):") + yield Input(id=f"tldw-api-video-start-time-{media_type}", placeholder="Optional") + with Vertical(classes="ingest-form-col"): + yield Label("End Time (HH:MM:SS or secs):") + yield Input(id=f"tldw-api-video-end-time-{media_type}", placeholder="Optional") + elif media_type == "audio": + with Container(id=TLDW_API_AUDIO_OPTIONS_ID, classes="tldw-api-media-specific-options"): + yield Static("Audio Specific Options", classes="sidebar-title") + yield Label("Transcription Model:") + yield Input("deepdml/faster-distil-whisper-large-v3.5", id=f"tldw-api-audio-transcription-model-{media_type}") + yield Label("Transcription Language (e.g., 'en'):") + yield Input("en", id=f"tldw-api-audio-transcription-language-{media_type}") + yield Checkbox("Enable Speaker Diarization", False, id=f"tldw-api-audio-diarize-{media_type}") + yield Checkbox("Include Timestamps in Transcription", True, id=f"tldw-api-audio-timestamp-{media_type}") + yield Checkbox("Enable VAD (Voice Activity Detection)", False, id=f"tldw-api-audio-vad-{media_type}") + # TODO: Add other audio specific fields from ProcessAudioRequest: confab (e.g. id=f"tldw-api-audio-confab-check-{media_type}") + elif media_type == "pdf": pdf_engine_options = [(engine, engine) for engine in PdfEngine.__args__] - yield Label("PDF Parsing Engine:") - yield Select(pdf_engine_options, id="tldw-api-pdf-engine", value="pymupdf4llm") - - # Ebook Options - with Container(id=TLDW_API_EBOOK_OPTIONS_ID, classes="tldw-api-media-specific-options hidden"): - yield Static("Ebook Specific Options", classes="sidebar-title") + with Container(id=TLDW_API_PDF_OPTIONS_ID, classes="tldw-api-media-specific-options"): + yield Static("PDF Specific Options", classes="sidebar-title") + yield Label("PDF Parsing Engine:") + yield Select(pdf_engine_options, id=f"tldw-api-pdf-engine-{media_type}", value="pymupdf4llm") + elif media_type == "ebook": ebook_extraction_options = [("filtered", "filtered"), ("markdown", "markdown"), ("basic", "basic")] - yield Label("Ebook Extraction Method:") - yield Select(ebook_extraction_options, id="tldw-api-ebook-extraction-method", value="filtered") - # Ebook chunk_method defaults to ebook_chapters in schema, no specific UI override here unless complex - - # Document Options - with Container(id=TLDW_API_DOCUMENT_OPTIONS_ID, classes="tldw-api-media-specific-options hidden"): - yield Static("Document Specific Options", classes="sidebar-title") - # Document chunk_method defaults to sentences in schema - # Add specific document parsing options if any - - # XML Options - with Container(id=TLDW_API_XML_OPTIONS_ID, classes="tldw-api-media-specific-options hidden"): - yield Static("XML Specific Options", classes="sidebar-title") - yield Checkbox("Auto Summarize XML Content", False, id="tldw-api-xml-auto-summarize") - - # MediaWiki Dump Options - with Container(id=TLDW_API_MEDIAWIKI_OPTIONS_ID, classes="tldw-api-media-specific-options hidden"): - yield Static("MediaWiki Dump Specific Options", classes="sidebar-title") - yield Label("Wiki Name (for identification):") - yield Input(id="tldw-api-mediawiki-wiki-name", placeholder="e.g., my_wiki_backup") - yield Label("Namespaces (comma-sep IDs, optional):") - yield Input(id="tldw-api-mediawiki-namespaces", placeholder="e.g., 0,14") - yield Checkbox("Skip Redirect Pages", True, id="tldw-api-mediawiki-skip-redirects") + with Container(id=TLDW_API_EBOOK_OPTIONS_ID, classes="tldw-api-media-specific-options"): + yield Static("Ebook Specific Options", classes="sidebar-title") + yield Label("Ebook Extraction Method:") + yield Select(ebook_extraction_options, id=f"tldw-api-ebook-extraction-method-{media_type}", value="filtered") + elif media_type == "document": + with Container(id=TLDW_API_DOCUMENT_OPTIONS_ID, classes="tldw-api-media-specific-options"): + yield Static("Document Specific Options", classes="sidebar-title") + # yield Label("Note: Document specific options are minimal beyond common settings.") # Placeholder - can be removed or made more specific if needed + # If adding specific fields, ensure their IDs are dynamic e.g. id=f"tldw-api-doc-some-option-{media_type}" + elif media_type == "xml": + with Container(id=TLDW_API_XML_OPTIONS_ID, classes="tldw-api-media-specific-options"): + yield Static("XML Specific Options (Note: Only one local file at a time)", classes="sidebar-title") + yield Checkbox("Auto Summarize XML Content", False, id=f"tldw-api-xml-auto-summarize-{media_type}") + elif media_type == "mediawiki_dump": + with Container(id=TLDW_API_MEDIAWIKI_OPTIONS_ID, classes="tldw-api-media-specific-options"): + yield Static("MediaWiki Dump Specific Options (Note: Only one local file at a time)", classes="sidebar-title") + yield Label("Wiki Name (for identification):") + yield Input(id=f"tldw-api-mediawiki-wiki-name-{media_type}", placeholder="e.g., my_wiki_backup") + yield Label("Namespaces (comma-sep IDs, optional):") + yield Input(id=f"tldw-api-mediawiki-namespaces-{media_type}", placeholder="e.g., 0,14") + yield Checkbox("Skip Redirect Pages (recommended)", True, id=f"tldw-api-mediawiki-skip-redirects-{media_type}") + # --- End of Inserted Media-Type Specific Options --- yield Static("Local Database Options", classes="sidebar-title") - yield Checkbox("Overwrite if media exists in local DB", False, id="tldw-api-overwrite-db") + yield Checkbox("Overwrite if media exists in local DB", False, id=f"tldw-api-overwrite-db-{media_type}") - yield Button("Submit to TLDW API", id="tldw-api-submit", variant="primary", classes="ingest-submit-button") + yield Button("Submit to TLDW API", id=f"tldw-api-submit-{media_type}", variant="primary", classes="ingest-submit-button") + # LoadingIndicator and TextArea for API status/error messages + yield LoadingIndicator(id=f"tldw-api-loading-indicator-{media_type}", classes="hidden") # Initially hidden + yield TextArea( + "", + id=f"tldw-api-status-area-{media_type}", + read_only=True, + classes="ingest-status-area hidden", # Initially hidden, common styling + language="markdown" # Use markdown for potential formatting + ) def compose(self) -> ComposeResult: with VerticalScroll(id="ingest-nav-pane", classes="ingest-nav-pane"): yield Static("Ingestion Methods", classes="sidebar-title") + # Add new buttons for each media type + for media_type in MEDIA_TYPES: + label = f"Ingest {media_type.replace('_', ' ').title()} via tldw API" + if media_type == 'mediawiki_dump': + label = "Ingest MediaWiki Dump via tldw API" + button_id = f"ingest-nav-tldw-api-{media_type}" + yield Button(label, id=button_id, classes="ingest-nav-button") yield Button("Ingest Prompts", id="ingest-nav-prompts", classes="ingest-nav-button") yield Button("Ingest Characters", id="ingest-nav-characters", classes="ingest-nav-button") yield Button("Ingest Media (Local)", id="ingest-nav-media", classes="ingest-nav-button") yield Button("Ingest Notes", id="ingest-nav-notes", classes="ingest-nav-button") - yield Button("Ingest Media via tldw API", id="ingest-nav-tldw-api", classes="ingest-nav-button") + with Container(id="ingest-content-pane", classes="ingest-content-pane"): # --- Prompts Ingest View --- @@ -272,9 +294,13 @@ def compose(self) -> ComposeResult: classes="ingest-view-area", ) - # New container for tldw API form - with Container(id="ingest-view-tldw-api", classes="ingest-view-area"): - yield from self.compose_tldw_api_form() + # New containers for tldw API forms for each media type + for media_type in MEDIA_TYPES: + with Container(id=f"ingest-view-tldw-api-{media_type}", classes="ingest-view-area hidden"): # Start hidden + # TODO: Later, tailor the form composition if needed per media type, (This is the next step) + # or decide if one generic form is enough and it's just the nav that changes. + # For now, we assume compose_tldw_api_form is generic enough or will be adapted. + yield from self.compose_tldw_api_form(media_type=media_type) diff --git a/tldw_chatbook/UI/LLM_Management_Window.py b/tldw_chatbook/UI/LLM_Management_Window.py index 6e035764..c5bc62da 100644 --- a/tldw_chatbook/UI/LLM_Management_Window.py +++ b/tldw_chatbook/UI/LLM_Management_Window.py @@ -143,11 +143,52 @@ def compose(self) -> ComposeResult: yield RichLog(id="vllm-log-output", classes="log_output", wrap=True, highlight=True) with Container(id="llm-view-onnx", classes="llm-view-area"): yield Static("ONNX Management Area - Content Coming Soon!") - yield Container( - Static("Transformers Library Management Area - Content Coming Soon!"), - id="llm-view-transformers", - classes="llm-view-area", - ) + # --- Transformers View --- + with Container(id="llm-view-transformers", classes="llm-view-area"): + with VerticalScroll(): + yield Label("Hugging Face Transformers Model Management", + classes="section_label") # Use a consistent class like .section_label or .pane-title + + yield Label("Local Models Root Directory (for listing/browsing):", classes="label") + with Container(classes="input_container"): # Re-use styling for input button + yield Input(id="transformers-models-dir-path", + placeholder="/path/to/your/hf_models_cache_or_local_dir") + yield Button("Browse Dir", id="transformers-browse-models-dir-button", + classes="browse_button") + + yield Button("List Local Models", id="transformers-list-local-models-button", + classes="action_button") + yield RichLog(id="transformers-local-models-list", classes="log_output", markup=True, + highlight=False) # markup=True for Rich tags + yield Static("---", classes="separator") # Visual separator + + yield Label("Download New Model:", classes="label section_label") # Use consistent class + yield Label("Model Repo ID (e.g., 'google-bert/bert-base-uncased'):", classes="label") + yield Input(id="transformers-download-repo-id", placeholder="username/model_name") + yield Label("Revision/Branch (optional):", classes="label") + yield Input(id="transformers-download-revision", placeholder="main") + yield Button("Download Model", id="transformers-download-model-button", classes="action_button") + yield Static("---", classes="separator") + yield Label("Run Custom Transformers Server Script:", classes="label section_label") + yield Label("Python Interpreter:", classes="label") + yield Input(id="transformers-python-path", value="python", placeholder="e.g., /path/to/venv/bin/python") + yield Label("Path to your Server Script (.py):", classes="label") + with Container(classes="input_container"): + yield Input(id="transformers-script-path", placeholder="/path/to/your_transformers_server_script.py") + yield Button("Browse Script", id="transformers-browse-script-button", classes="browse_button") + yield Label("Model to Load (ID or Path for script):", classes="label") + yield Input(id="transformers-server-model-arg", placeholder="Script-dependent model identifier") + yield Label("Host:", classes="label") + yield Input(id="transformers-server-host", value="127.0.0.1") + yield Label("Port:", classes="label") + yield Input(id="transformers-server-port", value="8003") # Example port + yield Label("Additional Script Arguments:", classes="label") + yield TextArea(id="transformers-server-additional-args", classes="additional_args_textarea", language="bash", theme="vscode_dark") + yield Button("Start Transformers Server", id="transformers-start-server-button", classes="action_button") + yield Button("Stop Transformers Server", id="transformers-stop-server-button", classes="action_button") + + yield Label("Operations Log:", classes="label section_label") # Use consistent class + yield RichLog(id="transformers-log-output", classes="log_output", wrap=True, highlight=True) yield Container( Static("Local Model Management Area - Content Coming Soon!"), id="llm-view-local-models", diff --git a/tldw_chatbook/app.py b/tldw_chatbook/app.py index c3962d9c..0514482d 100644 --- a/tldw_chatbook/app.py +++ b/tldw_chatbook/app.py @@ -19,10 +19,6 @@ Static, Button, Input, Header, RichLog, TextArea, Select, ListView, Checkbox, Collapsible, ListItem, Label ) from textual.containers import Horizontal, Container, HorizontalScroll, VerticalScroll - -from tldw_chatbook.Event_Handlers.Chat_Events.chat_streaming_events import handle_streaming_chunk, handle_stream_done -from tldw_chatbook.Event_Handlers.worker_events import StreamingChunk, StreamDone -from .Widgets.AppFooterStatus import AppFooterStatus from textual.reactive import reactive from textual.worker import Worker, WorkerState from textual.binding import Binding @@ -33,6 +29,10 @@ from pathlib import Path # # --- Local API library Imports --- +from .Event_Handlers.LLM_Management_Events import llm_management_events_transformers as transformers_handlers +from tldw_chatbook.Event_Handlers.Chat_Events.chat_streaming_events import handle_streaming_chunk, handle_stream_done +from tldw_chatbook.Event_Handlers.worker_events import StreamingChunk, StreamDone +from .Widgets.AppFooterStatus import AppFooterStatus from .Utils import Utils from .config import ( get_media_db_path, @@ -99,7 +99,7 @@ from .UI.Notes_Window import NotesWindow from .UI.Logs_Window import LogsWindow from .UI.Stats_Window import StatsWindow -from .UI.Ingest_Window import IngestWindow +from .UI.Ingest_Window import IngestWindow, INGEST_VIEW_IDS, INGEST_NAV_BUTTON_IDS from .UI.Tools_Settings_Window import ToolsSettingsWindow from .UI.LLM_Management_Window import LLMManagementWindow from .UI.Evals_Window import EvalsWindow # Added EvalsWindow @@ -727,6 +727,18 @@ def _update_llamacpp_log(self, message: str) -> None: except Exception as e: # pylint: disable=broad-except self.loguru_logger.error(f"Error writing to Llama.cpp log: {e}", exc_info=True) + def _update_transformers_log(self, message: str) -> None: + """Helper to write messages to the Transformers log widget.""" + try: + # Assuming the Transformers view is active when this is called, + # or the log widget is always part of the composed layout. + log_widget = self.query_one("#transformers-log-output", RichLog) + log_widget.write(message) + except QueryError: + self.loguru_logger.error("Failed to query #transformers-log-output to write message.") + except Exception as e: # pylint: disable=broad-except + self.loguru_logger.error(f"Error writing to Transformers log: {e}", exc_info=True) + def _update_llamafile_log(self, message: str) -> None: """Helper to write messages to the Llamafile log widget.""" try: @@ -1469,7 +1481,6 @@ def watch_evals_sidebar_collapsed(self, collapsed: bool) -> None: except Exception as e: self.loguru_logger.error(f"Error toggling Evals sidebar: {e}", exc_info=True) - # --- Method DEFINITION for show_ingest_view --- def show_ingest_view(self, view_id_to_show: Optional[str]): """ Shows the specified ingest view within the ingest-content-pane and hides others. @@ -1568,7 +1579,10 @@ async def save_current_note(self) -> bool: # self.notify("Unexpected error saving note.", severity="error") return False + + ####################################################################### # --- Notes UI Event Handlers (Chat Tab Sidebar) --- + ####################################################################### @on(Button.Pressed, "#chat-notes-create-new-button") async def handle_chat_notes_create_new(self, event: Button.Pressed) -> None: """Handles the 'Create New Note' button press in the chat sidebar's notes section.""" @@ -1982,8 +1996,12 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: self.show_ingest_view("ingest-view-media") elif button_id == "ingest-nav-notes": self.show_ingest_view("ingest-view-notes") - elif button_id == "ingest-nav-tldw-api": - self.show_ingest_view("ingest-view-tldw-api") + elif button_id.startswith("ingest-nav-tldw-api-"): # Handle new dynamic TLDW API nav buttons + view_to_activate_nav = button_id.replace("ingest-nav-", "ingest-view-") + self.loguru_logger.info( + f"Ingest TLDW API nav button '{button_id}' pressed. Activating view '{view_to_activate_nav}'.") + self.ingest_active_view = view_to_activate_nav # This should trigger the watcher + return # Nav button handled # --- Buttons within ingest-view-prompts --- # Ensure these handlers are only called if the ingest-view-prompts is active @@ -1997,7 +2015,9 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: await ingest_events.handle_ingest_prompts_import_now_button_pressed(self) + ####################################################################### # --- Tab-Specific Button Actions --- + ####################################################################### if current_active_tab == TAB_CHAT: action_widget = self._get_chat_message_widget_from_button(button) if action_widget: @@ -2091,13 +2111,20 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: elif button_id == "ccp-editor-prompt-save-button": await ccp_handlers.handle_ccp_editor_prompt_save_button_pressed(self) elif button_id == "ccp-editor-prompt-clone-button": await ccp_handlers.handle_ccp_editor_prompt_clone_button_pressed(self) elif button_id == "ccp-editor-prompt-delete-button": await ccp_handlers.handle_ccp_editor_prompt_delete_button_pressed(self) + # Buttons for CENTER PANE CHARACTER editor + elif button_id == "ccp-editor-char-save-button": await ccp_handlers.handle_ccp_editor_char_save_button_pressed(self) + elif button_id == "ccp-editor-char-cancel-button": await ccp_handlers.handle_ccp_editor_char_cancel_button_pressed(self) + # Other CCP buttons elif button_id == "ccp-import-conversation-button": await ccp_handlers.handle_ccp_import_conversation_button_pressed(self) elif button_id == "ccp-right-pane-load-character-button": self.loguru_logger.info(f"CCP Right Pane Load Character button pressed: {button_id}") await ccp_handlers.handle_ccp_left_load_character_button_pressed(self) else: self.loguru_logger.warning(f"Unhandled button on CCP tab -> ID: {button_id}, Label: '{button.label}'") + + ####################################################################### # --- Notes Tab --- + ####################################################################### elif current_active_tab == TAB_NOTES: if button_id == "notes-create-new-button": await notes_handlers.handle_notes_create_new_button_pressed(self) elif button_id == "notes-edit-selected-button": await notes_handlers.handle_notes_edit_selected_button_pressed(self) @@ -2110,7 +2137,10 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: elif button_id == "notes-save-keywords-button": await notes_handlers.handle_notes_save_keywords_button_pressed(self) else: self.loguru_logger.warning(f"Unhandled button on NOTES tab: {button_id}") + + ####################################################################### # --- Media Tab --- + ####################################################################### elif current_active_tab == TAB_MEDIA: if button_id and button_id.startswith("media-nav-"): # e.g., "media-nav-video-audio" -> "media-view-video-audio" @@ -2128,46 +2158,52 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: else: self.loguru_logger.warning(f"Unhandled button on MEDIA tab: ID:{button_id}, Label:'{button.label}'") + + ####################################################################### # --- Ingestion Tab --- + ####################################################################### elif current_active_tab == TAB_INGEST: - # Navigation buttons within the Ingest tab's left pane - if button_id and button_id.startswith("ingest-nav-"): - view_to_activate_nav = button_id.replace("ingest-nav-", "ingest-view-") - self.loguru_logger.info( - f"Ingest nav button '{button_id}' pressed. Activating view '{view_to_activate_nav}'.") - self.ingest_active_view = view_to_activate_nav - - if view_to_activate_nav == "ingest-view-prompts": - try: - selected_list_view = self.query_one("#ingest-prompts-selected-files-list", ListView) - if not selected_list_view.children: - await selected_list_view.clear() - await selected_list_view.append(ListItem(Label("No files selected."))) - preview_area = self.query_one("#ingest-prompts-preview-area", VerticalScroll) - if not preview_area.children: - await preview_area.mount( - Static("Select files to see a preview.", id="ingest-prompts-preview-placeholder")) - except QueryError: - self.loguru_logger.warning( - "Failed to initialize prompts list/preview elements on nav click to prompts.") - elif view_to_activate_nav == "ingest-view-characters": - try: - selected_list_view = self.query_one("#ingest-characters-selected-files-list", ListView) - if not selected_list_view.children: - await selected_list_view.clear() - await selected_list_view.append(ListItem(Label("No files selected."))) - preview_area = self.query_one("#ingest-characters-preview-area", VerticalScroll) - if not preview_area.children: - await preview_area.mount( - Static("Select files to see a preview.", id="ingest-characters-preview-placeholder")) - except QueryError: - self.loguru_logger.warning( - "Failed to initialize characters list/preview for ingest-view-characters on nav click.") - return # Nav button handled - return - - # ELSE, if not a nav button, it must be a button within an active sub-view - else: + # Check if it's one of the main ingest navigation buttons + if button_id in INGEST_NAV_BUTTON_IDS: # INGEST_NAV_BUTTON_IDS now includes tldw-api ones + self.loguru_logger.info(f"Ingest navigation button pressed: {button_id}") + # Determine the target view ID + # This covers "ingest-nav-prompts" -> "ingest-view-prompts" + # and "ingest-nav-tldw-api-video" -> "ingest-view-tldw-api-video" + target_view_id = button_id.replace("ingest-nav-", "ingest-view-") + + self.ingest_active_view = target_view_id # This will trigger the watcher + + # Initialize UI elements if necessary (e.g., for prompts or characters view) + if target_view_id == "ingest-view-prompts": + try: + selected_list_view = self.query_one("#ingest-prompts-selected-files-list", ListView) + if not selected_list_view.children: + await selected_list_view.clear() + await selected_list_view.append(ListItem(Label("No files selected."))) + preview_area = self.query_one("#ingest-prompts-preview-area", VerticalScroll) + if not preview_area.children: + await preview_area.mount( + Static("Select files to see a preview.", id="ingest-prompts-preview-placeholder")) + except QueryError: + self.loguru_logger.warning( + "Failed to initialize prompts list/preview elements on nav click to prompts.") + elif target_view_id == "ingest-view-characters": + try: + selected_list_view = self.query_one("#ingest-characters-selected-files-list", ListView) + if not selected_list_view.children: + await selected_list_view.clear() + await selected_list_view.append(ListItem(Label("No files selected."))) + preview_area = self.query_one("#ingest-characters-preview-area", VerticalScroll) + if not preview_area.children: + await preview_area.mount( + Static("Select files to see a preview.", id="ingest-characters-preview-placeholder")) + except QueryError: + self.loguru_logger.warning( + "Failed to initialize characters list/preview for ingest-view-characters on nav click.") + # Add similar initializations for other static ingest views if needed + return # Navigation handled + + # If not a main nav button, it might be a button within an active sub-view active_ingest_sub_view = self.ingest_active_view if active_ingest_sub_view == "ingest-view-prompts": @@ -2179,7 +2215,8 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: return elif button_id == "ingest-prompts-import-now-button": await ingest_events.handle_ingest_prompts_import_now_button_pressed(self) - return + else: self.loguru_logger.warning(f"Unhandled button on INGEST (Prompts) sub-view: {button_id}") + return elif active_ingest_sub_view == "ingest-view-characters": if button_id == "ingest-characters-select-file-button": @@ -2190,19 +2227,38 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: return elif button_id == "ingest-characters-import-now-button": await ingest_events.handle_ingest_characters_import_now_button_pressed(self) - return + else: self.loguru_logger.warning(f"Unhandled button on INGEST (Characters) sub-view: {button_id}") + return - # Add other sub-views like ingest-view-notes here - # elif active_ingest_sub_view == "ingest-view-notes": - # # ... handle buttons for notes ingest ... - # pass # Remember to return if handled + elif active_ingest_sub_view == "ingest-view-notes": + if button_id == "ingest-notes-select-file-button": + await ingest_events.handle_ingest_notes_select_file_button_pressed(self) + elif button_id == "ingest-notes-clear-files-button": + await ingest_events.handle_ingest_notes_clear_files_button_pressed(self) + elif button_id == "ingest-notes-import-now-button": + await ingest_events.handle_ingest_notes_import_now_button_pressed(self) + else: self.loguru_logger.warning(f"Unhandled button on INGEST (Notes) sub-view: {button_id}") + return + + # Handle TLDW API form submissions (dynamic IDs) + elif active_ingest_sub_view and active_ingest_sub_view.startswith("ingest-view-tldw-api-"): + # The button ID itself will be like "tldw-api-submit-{media_type}" + # The active_ingest_sub_view is "ingest-view-tldw-api-{media_type}" + # The event handler ingest_events.handle_tldw_api_submit_button_pressed now extracts media_type from button_id + if button_id.startswith("tldw-api-submit-"): + await ingest_events.handle_tldw_api_submit_button_pressed(self, event) # Pass the full event + else: self.loguru_logger.warning(f"Unhandled button on INGEST (TLDW API - {active_ingest_sub_view}) sub-view: {button_id}") + return - # If no sub-view button matched after checking the active sub-view: + # Fallback for unhandled buttons on Ingest tab self.loguru_logger.warning( f"Unhandled button on INGEST tab: ID:{button_id}, Label:'{event.button.label}' (Active Ingest View: {active_ingest_sub_view})") return # Return after logging unhandled Ingest tab button + + ####################################################################### # --- Tools & Settings Tab --- + ####################################################################### elif current_active_tab == TAB_TOOLS_SETTINGS: if button_id and button_id.startswith("ts-nav-"): # Extract the view name from the button ID @@ -2215,7 +2271,10 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: self.loguru_logger.warning( f"Unhandled button on TOOLS & SETTINGS tab: ID:{button_id}, Label:'{button.label}'") + + ####################################################################### # --- LLM Inference Tab --- + ####################################################################### elif current_active_tab == TAB_LLM: if button_id and button_id.startswith("llm-nav-"): await llm_handlers.handle_llm_nav_button_pressed(self, button_id) @@ -2235,6 +2294,13 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: await handle_start_llamafile_server_button_pressed(self) elif button_id == "llamafile-stop-server-button": await handle_stop_llamafile_server_button_pressed(self) + # Transformers buttons + elif button_id == "transformers-browse-models-dir-button": + await transformers_handlers.handle_transformers_browse_models_dir_button_pressed(self); return + elif button_id == "transformers-list-local-models-button": + await transformers_handlers.handle_transformers_list_local_models_button_pressed(self); return + elif button_id == "transformers-download-model-button": + await transformers_handlers.handle_transformers_download_model_button_pressed(self); return # Add these new conditions for vLLM: elif button_id == "vllm-browse-python-button": await handle_vllm_browse_python_button_pressed(self) @@ -2253,12 +2319,18 @@ async def on_button_pressed(self, event: Button.Pressed) -> None: self.loguru_logger.warning( f"Unhandled button on LLM MANAGEMENT tab: ID:{button_id}, Label:'{button.label}'") + + ####################################################################### # --- Logging Tab --- + ####################################################################### elif current_active_tab == TAB_LOGS: if button_id == "copy-logs-button": await app_lifecycle_handlers.handle_copy_logs_button_pressed(self) else: self.loguru_logger.warning(f"Unhandled button on LOGS tab: {button_id}") + + ####################################################################### # --- Evals Tab --- + ####################################################################### elif current_active_tab == TAB_EVALS: if button_id == "toggle-evals-sidebar": self.evals_sidebar_collapsed = not self.evals_sidebar_collapsed @@ -2431,7 +2503,9 @@ async def on_worker_state_changed(self, event: Worker.StateChanged) -> None: f"Group='{worker_group}', State='{worker_state}', Desc='{worker_description}'" ) + ####################################################################### # --- Handle Chat-related API Calls --- + ####################################################################### if isinstance(worker_name_attr, str) and \ (worker_name_attr.startswith("API_Call_chat") or worker_name_attr.startswith("API_Call_ccp") or @@ -2498,7 +2572,10 @@ async def on_worker_state_changed(self, event: Worker.StateChanged) -> None: else: self.loguru_logger.debug(f"Chat-related worker '{worker_name_attr}' in other state: {worker_state}") + + ####################################################################### # --- Handle Llama.cpp Server Worker (identified by group) --- + ####################################################################### # This handles the case where worker_name_attr was a list. elif worker_group == "llamacpp_server": self.loguru_logger.info( @@ -2575,7 +2652,10 @@ async def on_worker_state_changed(self, event: Worker.StateChanged) -> None: except QueryError: self.loguru_logger.warning("Could not find Llama.cpp server buttons to update for ERROR state.") + + ####################################################################### # --- Handle Llamafile Server Worker (identified by group) --- + ####################################################################### elif worker_group == "llamafile_server": # Add this new elif block self.loguru_logger.info( f"Llamafile server worker (Group: '{worker_group}') state changed to {worker_state}." @@ -2637,7 +2717,10 @@ async def on_worker_state_changed(self, event: Worker.StateChanged) -> None: except QueryError: self.loguru_logger.warning("Could not find Llamafile server buttons for ERROR state.") + + ####################################################################### # --- Handle vLLM Server Worker (identified by group) --- + ####################################################################### elif worker_group == "vllm_server": self.loguru_logger.info( f"vLLM server worker (Group: '{worker_group}', NameAttr: '{worker_name_attr}') state changed to {worker_state}." @@ -2668,7 +2751,53 @@ async def on_worker_state_changed(self, event: Worker.StateChanged) -> None: except QueryError: self.loguru_logger.warning("Could not find vLLM server buttons to update state for STOPPED/ERROR.") + + ####################################################################### + # --- Handle Transformers Server Worker (identified by group) --- + ####################################################################### + elif worker_group == "transformers_download": + self.loguru_logger.info( + f"Transformers Download worker (Group: '{worker_group}') state changed to {worker_state}." + ) + download_button_id = "#transformers-download-model-button" + + if worker_state == WorkerState.RUNNING: + self.loguru_logger.info("Transformers model download worker is RUNNING.") + try: + self.query_one(download_button_id, Button).disabled = True + except QueryError: + self.loguru_logger.warning( + f"Could not find button {download_button_id} to disable for RUNNING state.") + + elif worker_state == WorkerState.SUCCESS: + result_message = str(event.worker.result).strip() if event.worker.result else "Download completed." + self.loguru_logger.info(f"Transformers Download worker SUCCESS. Result: {result_message}") + if "failed" in result_message.lower() or "error" in result_message.lower() or "non-zero code" in result_message.lower(): + self.notify(f"Model Download: {result_message}", title="Download Issue", severity="error", + timeout=10) + else: + self.notify(f"Model Download: {result_message}", title="Download Complete", severity="information", + timeout=7) + try: + self.query_one(download_button_id, Button).disabled = False + except QueryError: + self.loguru_logger.warning( + f"Could not find button {download_button_id} to enable for SUCCESS state.") + + elif worker_state == WorkerState.ERROR: + error_details = str(event.worker.error) if event.worker.error else "Unknown worker error." + self.loguru_logger.error(f"Transformers Download worker FAILED. Error: {error_details}") + self.notify(f"Model Download Failed: {error_details[:100]}...", title="Download Error", + severity="error", timeout=10) + try: + self.query_one(download_button_id, Button).disabled = False + except QueryError: + self.loguru_logger.warning(f"Could not find button {download_button_id} to enable for ERROR state.") + + + ####################################################################### # --- Handle Llamafile Server Worker (identified by group) --- + ####################################################################### elif worker_group == "llamafile_server": self.loguru_logger.info( f"Llamafile server worker (Group: '{worker_group}', NameAttr: '{worker_name_attr}') state changed to {worker_state}." @@ -2696,7 +2825,10 @@ async def on_worker_state_changed(self, event: Worker.StateChanged) -> None: self.loguru_logger.warning( "Could not find Llamafile server buttons to update state for STOPPED/ERROR.") + + ####################################################################### # --- Handle Model Download Worker (identified by group) --- + ####################################################################### elif worker_group == "model_download": self.loguru_logger.info( f"Model Download worker (Group: '{worker_group}', NameAttr: '{worker_name_attr}') state changed to {worker_state}." @@ -2719,7 +2851,10 @@ async def on_worker_state_changed(self, event: Worker.StateChanged) -> None: self.loguru_logger.warning( "Could not find model download button to re-enable (ID might be incorrect or view not present).") + + ####################################################################### # --- Fallback for any other workers not explicitly handled above --- + ####################################################################### else: # This branch handles workers that are not chat-related and not one of the explicitly grouped servers. # It also catches the case where worker_name_attr was a list but not for a known group. diff --git a/tldw_chatbook/css/tldw_cli.tcss b/tldw_chatbook/css/tldw_cli.tcss index 03971316..7aa715df 100644 --- a/tldw_chatbook/css/tldw_cli.tcss +++ b/tldw_chatbook/css/tldw_cli.tcss @@ -317,6 +317,7 @@ Footer { dock: bottom; height: 1; background: $accent-darken-1; } width: 100%; height: auto; /* Allow height to be determined by content */ /* overflow: auto; /* If content within might overflow */ + overflow: auto; } /* Add this class to hide elements */ @@ -338,7 +339,10 @@ Footer { dock: bottom; height: 1; background: $accent-darken-1; } } #ccp-character-editor-view { - display: none; /* Initially hidden, to be shown by Python logic */ + display: none; /* Initially hidden */ + layout: vertical; /* Important for stacking the scroller and button bar */ + width: 100%; + height: 100%; /* Fill the .cc-center-pane */ } /* Ensure the right pane sections also respect hidden class */ @@ -365,7 +369,7 @@ Footer { dock: bottom; height: 1; background: $accent-darken-1; } /* --- Prompts Sidebar Vertical --- */ .ccp-prompt-textarea { /* Specific class for prompt textareas if needed */ - height: 10; /* Example height */ + height: 20; /* Example height - Increased from 10 */ /* width: 100%; (from .sidebar-textarea) */ /* margin-bottom: 1; (from .sidebar-textarea) */ } @@ -375,11 +379,17 @@ Footer { dock: bottom; height: 1; background: $accent-darken-1; } border: round $surface; margin-bottom: 1; } - +.ccp-card-action-buttons { + height: auto; /* Let it size to content */ + width: 100%; + margin-top: 1; /* Space above buttons */ + margin-bottom: 2; /* Extra space below to ensure buttons are visible */ +} .ccp-prompt-action-buttons { margin-top: 1; /* Add space above the button bar */ height: auto; /* Allow container height to fit buttons */ - padding-bottom: 1; /* Add space below buttons before parent's padding */ + width: 100%; /* Full width for the button bar */ + /* padding-bottom: 1; Removed, parent #ccp-character-editor-view now handles this */ } .ccp-prompt-action-buttons Button { @@ -400,6 +410,15 @@ Footer { dock: bottom; height: 1; background: $accent-darken-1; } #conv-char-right-pane Collapsible.-active > .collapsible--header { /* Optional: when expanded */ background: $primary-background; } + +/* TextAreas for Character Card Display */ +.ccp-card-textarea { + height: 15; + width: 100%; + margin-bottom: 1; + border: round $surface; /* Ensuring consistent styling */ +} + /* --- End of Prompts Sidebar Vertical --- */ /* --- End of Conversations, Characters & Prompts Window specific layouts --- */ /* ----------------------------- ************************* ----------------------------- */ @@ -734,6 +753,19 @@ MetricsScreen Label.-info-message { #ingest-window { /* Matches TAB_INGEST */ layout: horizontal; } +.tldw-api-media-specific-options { /* Common class for specific option blocks */ + padding: 1; + border: round $surface; + margin-top: 1; + margin-bottom: 1; +} + +/* Added to ensure initially hidden specific options are indeed hidden */ +.tldw-api-media-specific-options.hidden { + padding: 1; + border: round $surface; + margin-top: 1; +} .ingest-nav-pane { /* Style for the left navigation pane */ dock: left; @@ -1294,11 +1326,15 @@ MetricsScreen Label.-info-message { } .ingest-textarea-small { - height: 3; + height: auto; + max-height: 10; + overflow-y: hidden; margin-bottom: 1; } .ingest-textarea-medium { - height: 5; + height: auto; + max-height: 15; + overflow-y: hidden; margin-bottom: 1; } .ingest-form-row { @@ -1307,6 +1343,12 @@ MetricsScreen Label.-info-message { height: auto; margin-bottom: 1; } +.title-author-row { /* New class for Title/Author row */ + layout: horizontal; + width: 100%; + height: auto; + margin-bottom: 0 !important; /* Override existing margin */ +} .ingest-form-col { width: 1fr; padding: 0 1; diff --git a/tldw_chatbook/tldw_api/client.py b/tldw_chatbook/tldw_api/client.py index 4b7adc3a..f966fa97 100644 --- a/tldw_chatbook/tldw_api/client.py +++ b/tldw_chatbook/tldw_api/client.py @@ -183,7 +183,7 @@ async def process_xml(self, request_data: ProcessXMLRequest, file_path: str) -> ) return BatchProcessXMLResponse( processed_count=1 if single_item_result.status not in ["Error"] else 0, - errors_count=1 if single_item_result.status == "Error" else 0, + errors_count=1 if single_item_result.status == "Error" or single_item_result.error else 0, errors=[single_item_result.error] if single_item_result.error else [], results=[single_item_result] ) diff --git a/tldw_chatbook/tldw_api/schemas.py b/tldw_chatbook/tldw_api/schemas.py index 19e41482..862fec02 100644 --- a/tldw_chatbook/tldw_api/schemas.py +++ b/tldw_chatbook/tldw_api/schemas.py @@ -3,7 +3,7 @@ from pydantic import BaseModel, Field, HttpUrl # Enum-like Literals from API schema -MediaType = Literal['video', 'audio', 'document', 'pdf', 'ebook', 'xml', 'mediawiki_dump'] # Added xml, mediawiki +MediaType = Literal['video', 'audio', 'document', 'pdf', 'ebook', 'xml', 'mediawiki_dump'] ChunkMethod = Literal['semantic', 'tokens', 'paragraphs', 'sentences', 'words', 'ebook_chapters', 'json'] PdfEngine = Literal['pymupdf4llm', 'pymupdf', 'docling'] ScrapeMethod = Literal["individual", "sitemap", "url_level", "recursive_scraping"] diff --git a/tldw_chatbook/tldw_api/utils.py b/tldw_chatbook/tldw_api/utils.py index 45b2235f..a15dfceb 100644 --- a/tldw_chatbook/tldw_api/utils.py +++ b/tldw_chatbook/tldw_api/utils.py @@ -2,12 +2,13 @@ # # # Imports +import logging from pathlib import Path from typing import Dict, Any, Optional, List, IO, Tuple +import mimetypes # # 3rd-party Libraries from pydantic import BaseModel -import httpx # ####################################################################################################################### # @@ -56,24 +57,35 @@ def prepare_files_for_httpx( file_path_obj = Path(file_path_str) if not file_path_obj.is_file(): # Or raise an error, or log and skip - print(f"Warning: File not found or not a file: {file_path_str}") + # Consider using logging module here instead of logging.info for a library + logging.warning(f"Warning: File not found or not a file: {file_path_str}") continue file_obj = open(file_path_obj, "rb") - # Basic MIME type guessing, can be improved with `mimetypes` library - mime_type = None - if file_path_obj.suffix.lower() == ".mp4": - mime_type = "video/mp4" - elif file_path_obj.suffix.lower() == ".mp3": - mime_type = "audio/mpeg" - # Add more MIME types as needed + + mime_type, _ = mimetypes.guess_type(file_path_obj.name) # Use filename for guessing + + if mime_type is None: + # If the type can't be guessed, you can fallback to a generic MIME type + # 'application/octet-stream' is a common default for unknown binary data. + mime_type = 'application/octet-stream' + logging.warning(f"Could not guess MIME type for {file_path_obj.name}. Defaulting to {mime_type}.") + logging.info(f"Warning: Could not guess MIME type for {file_path_obj.name}. Defaulting to {mime_type}.") httpx_files_list.append( (upload_field_name, (file_path_obj.name, file_obj, mime_type)) ) except Exception as e: - print(f"Error preparing file {file_path_str} for upload: {e}") + # Consider using logging module here + logging.error(f"Error preparing file {file_path_str} for upload: {e}") # Handle error, e.g., skip this file or raise + # If you skip, ensure file_obj is closed if it was opened. + # However, in this structure, if open() fails, the exception occurs before append. + # If an error occurs after open() but before append, the file might not be closed. + # Using a try/finally for file_obj.close() or opening file_obj within a + # `with open(...) as file_obj:` block inside the `prepare_files_for_httpx` + # is safer if you add logic between open() and append() that could fail. + # For now, httpx will manage the file objects passed to it. return httpx_files_list if httpx_files_list else None #