Skip to content

Sync #51

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 4, 2025
Merged

Sync #51

Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 225 additions & 0 deletions Tests/UI/test_ingest_window.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
# test_ingest_window.py
#
# Imports
import pytest
from pytest_mock import MockerFixture # For mocking
from pathlib import Path
#
# Third-party Libraries
from textual.app import App, ComposeResult
from textual.widgets import Button, Input, Select, Checkbox, TextArea, RadioSet, RadioButton, Collapsible, ListView, \
ListItem, Markdown, LoadingIndicator, Label, Static
from textual.containers import Container, VerticalScroll, Horizontal, Vertical
from textual.pilot import Pilot
from textual.css.query import QueryError
#
# Local Imports
from tldw_chatbook.app import TldwCli # The main app
from tldw_chatbook.UI.Ingest_Window import IngestWindow, MEDIA_TYPES # Import MEDIA_TYPES
from tldw_chatbook.tldw_api.schemas import ProcessVideoRequest, ProcessAudioRequest, ProcessPDFRequest, \
ProcessEbookRequest, ProcessDocumentRequest, ProcessXMLRequest, ProcessMediaWikiRequest
#
#
########################################################################################################################
#
# Fixtures and Helper Functions

# Helper to get the IngestWindow instance from the app
async def get_ingest_window(pilot: Pilot) -> IngestWindow:
ingest_window_query = pilot.app.query(IngestWindow)
assert ingest_window_query.is_empty is False, "IngestWindow not found"
return ingest_window_query.first()


@pytest.fixture
async def app_pilot() -> Pilot:
app = TldwCli()
async with app.run_test() as pilot:
# Ensure the Ingest tab is active. Default is Chat.
# Switching tabs is handled by app.py's on_button_pressed for tab buttons.
# We need to find the Ingest tab button and click it.
# Assuming tab IDs are like "tab-ingest"
try:
await pilot.click("#tab-ingest")
except QueryError:
# Fallback if direct ID click isn't working as expected in test setup
# This might indicate an issue with tab IDs or pilot interaction timing
all_buttons = pilot.app.query(Button)
ingest_tab_button = None
for btn in all_buttons:
if btn.id == "tab-ingest":
ingest_tab_button = btn
break
assert ingest_tab_button is not None, "Ingest tab button not found"
await pilot.click(ingest_tab_button)

# Verify IngestWindow is present and active
ingest_window = await get_ingest_window(pilot)
assert ingest_window is not None
assert ingest_window.display is True, "IngestWindow is not visible after switching to Ingest tab"
# Also check the app's current_tab reactive variable
assert pilot.app.current_tab == "ingest", "App's current_tab is not set to 'ingest'"
yield pilot


# Test Class
class TestIngestWindowTLDWAPI:

async def test_initial_tldw_api_nav_buttons_and_views(self, app_pilot: Pilot):
ingest_window = await get_ingest_window(app_pilot)
# The IngestWindow itself is a container, nav buttons are direct children of its "ingest-nav-pane"
nav_pane = ingest_window.query_one("#ingest-nav-pane")

for mt in MEDIA_TYPES:
nav_button_id = f"ingest-nav-tldw-api-{mt.replace('_', '-')}" # IDs don't have #
view_id = f"ingest-view-tldw-api-{mt.replace('_', '-')}"

# Check navigation button exists
nav_button = nav_pane.query_one(f"#{nav_button_id}", Button)
assert nav_button is not None, f"Navigation button {nav_button_id} not found"
expected_label_part = mt.replace('_', ' ').title()
if mt == "mediawiki_dump":
expected_label_part = "MediaWiki Dump"
assert expected_label_part in str(nav_button.label), f"Label for {nav_button_id} incorrect"

# Check view area exists
view_area = ingest_window.query_one(f"#{view_id}", Container)
assert view_area is not None, f"View area {view_id} not found"

# Check initial visibility based on app's active ingest view
# This assumes that after switching to Ingest tab, a default sub-view *within* Ingest is activated.
# If `ingest_active_view` is set (e.g. to "ingest-view-prompts" by default), then
# all tldw-api views should be hidden.
active_ingest_view_on_app = app_pilot.app.ingest_active_view
if view_id != active_ingest_view_on_app:
assert view_area.display is False, f"{view_id} should be hidden if not the active ingest view ('{active_ingest_view_on_app}')"
else:
assert view_area.display is True, f"{view_id} should be visible as it's the active ingest view ('{active_ingest_view_on_app}')"

@pytest.mark.parametrize("media_type", MEDIA_TYPES)
async def test_tldw_api_navigation_and_view_display(self, app_pilot: Pilot, media_type: str):
ingest_window = await get_ingest_window(app_pilot)
nav_button_id = f"ingest-nav-tldw-api-{media_type.replace('_', '-')}"
target_view_id = f"ingest-view-tldw-api-{media_type.replace('_', '-')}"

await app_pilot.click(f"#{nav_button_id}")
await app_pilot.pause() # Allow watchers to update display properties

# Verify target view is visible
target_view_area = ingest_window.query_one(f"#{target_view_id}", Container)
assert target_view_area.display is True, f"{target_view_id} should be visible after clicking {nav_button_id}"
assert app_pilot.app.ingest_active_view == target_view_id, f"App's active ingest view should be {target_view_id}"

# Verify other TLDW API views are hidden
for other_mt in MEDIA_TYPES:
if other_mt != media_type:
other_view_id = f"ingest-view-tldw-api-{other_mt.replace('_', '-')}"
other_view_area = ingest_window.query_one(f"#{other_view_id}", Container)
assert other_view_area.display is False, f"{other_view_id} should be hidden when {target_view_id} is active"

# Verify common form elements exist with dynamic IDs
common_endpoint_input = target_view_area.query_one(f"#tldw-api-endpoint-url-{media_type}", Input)
assert common_endpoint_input is not None

common_submit_button = target_view_area.query_one(f"#tldw-api-submit-{media_type}", Button)
assert common_submit_button is not None

# Verify media-specific options container and its widgets
if media_type == "video":
opts_container = target_view_area.query_one("#tldw-api-video-options", Container)
assert opts_container.display is True
widget = opts_container.query_one(f"#tldw-api-video-transcription-model-{media_type}", Input)
assert widget is not None
elif media_type == "audio":
opts_container = target_view_area.query_one("#tldw-api-audio-options", Container)
assert opts_container.display is True
widget = opts_container.query_one(f"#tldw-api-audio-transcription-model-{media_type}", Input)
assert widget is not None
elif media_type == "pdf":
opts_container = target_view_area.query_one("#tldw-api-pdf-options", Container)
assert opts_container.display is True
widget = opts_container.query_one(f"#tldw-api-pdf-engine-{media_type}", Select)
assert widget is not None
elif media_type == "ebook":
opts_container = target_view_area.query_one("#tldw-api-ebook-options", Container)
assert opts_container.display is True
widget = opts_container.query_one(f"#tldw-api-ebook-extraction-method-{media_type}", Select)
assert widget is not None
elif media_type == "document": # Has minimal specific options currently
opts_container = target_view_area.query_one("#tldw-api-document-options", Container)
assert opts_container.display is True
# Example: find the label if one exists
try:
label = opts_container.query_one(Label) # Assuming there's at least one label
assert label is not None
except QueryError: # If no labels, this is fine for doc
pass
elif media_type == "xml":
opts_container = target_view_area.query_one("#tldw-api-xml-options", Container)
assert opts_container.display is True
widget = opts_container.query_one(f"#tldw-api-xml-auto-summarize-{media_type}", Checkbox)
assert widget is not None
elif media_type == "mediawiki_dump":
opts_container = target_view_area.query_one("#tldw-api-mediawiki-options", Container)
assert opts_container.display is True
widget = opts_container.query_one(f"#tldw-api-mediawiki-wiki-name-{media_type}", Input)
assert widget is not None

async def test_tldw_api_video_submission_data_collection(self, app_pilot: Pilot, mocker: MockerFixture):
media_type = "video"
ingest_window = await get_ingest_window(app_pilot)

# Navigate to video tab by clicking its nav button
nav_button_id = f"ingest-nav-tldw-api-{media_type}"
await app_pilot.click(f"#{nav_button_id}")
await app_pilot.pause() # Allow UI to update

target_view_id = f"ingest-view-tldw-api-{media_type}"
target_view_area = ingest_window.query_one(f"#{target_view_id}", Container)
assert target_view_area.display is True, "Video view area not displayed after click"

# Mock the API client and its methods
mock_api_client_instance = mocker.MagicMock()
# Make process_video an async mock
mock_process_video = mocker.AsyncMock(return_value=mocker.MagicMock())
mock_api_client_instance.process_video = mock_process_video
mock_api_client_instance.close = mocker.AsyncMock()

mocker.patch("tldw_chatbook.Event_Handlers.ingest_events.TLDWAPIClient", return_value=mock_api_client_instance)

# Set form values
endpoint_url_input = target_view_area.query_one(f"#tldw-api-endpoint-url-{media_type}", Input)
urls_textarea = target_view_area.query_one(f"#tldw-api-urls-{media_type}", TextArea)
video_trans_model_input = target_view_area.query_one(f"#tldw-api-video-transcription-model-{media_type}", Input)
auth_method_select = target_view_area.query_one(f"#tldw-api-auth-method-{media_type}", Select)

endpoint_url_input.value = "http://fakeapi.com"
urls_textarea.text = "http://example.com/video.mp4"
video_trans_model_input.value = "test_video_model"
auth_method_select.value = "config_token"

app_pilot.app.app_config = {"tldw_api": {"auth_token_config": "fake_token"}}

submit_button_id = f"tldw-api-submit-{media_type}"
await app_pilot.click(f"#{submit_button_id}")
await app_pilot.pause(delay=0.5)

mock_process_video.assert_called_once()
call_args = mock_process_video.call_args[0]

assert len(call_args) >= 1, "process_video not called with request_model"
request_model_arg = call_args[0]

assert isinstance(request_model_arg, ProcessVideoRequest)
assert request_model_arg.urls == ["http://example.com/video.mp4"]
assert request_model_arg.transcription_model == "test_video_model"
assert request_model_arg.api_key == "fake_token"

# Example for local_file_paths if it's the second argument
if len(call_args) > 1:
local_files_arg = call_args[1]
assert local_files_arg == [], "local_files_arg was not empty"
else:
# This case implies process_video might not have received local_file_paths,
# which could be an issue if it's expected. For now, let's assume it's optional.
pass
13 changes: 6 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -4,27 +4,26 @@ requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

[project]
name = "tldw_chatbook" # This is the distribution name (what you pip install)
name = "tldw_chatbook"
version = "0.1.0"
authors = [
{ name="Robert Musser", email="[email protected]" },
]
description = "A Textual TUI for chatting with LLMs, and interacting with the tldw server."
readme = "README.md" # Assumes README.md is in the project root
readme = "README.md"
requires-python = ">=3.11"
license = { file="LICENSE" } # Assumes LICENSE file is in the project root
license = { file="LICENSE" }
keywords = ["tui", "cli", "llm", "textual", "ai", "chat"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Intended Audience :: End Users/Desktop",
"Topic :: Utilities",
"Topic :: Terminals",
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", # Corrected to AGPL
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
# "Programming Language :: Python :: 3.13", # Only list if you actively test/support it
"Environment :: Console",
"Operating System :: OS Independent",
]
@@ -34,9 +33,9 @@ dependencies = [
"chardet",
"httpx",
"loguru",
"textual>=3.3.0", # Specify a minimum Textual version if features depend on it
"textual>=3.3.0",
"requests",
"rich", # Usually pulled in by Textual, but explicit is fine
"rich",
"Pillow",
"PyYAML",
"pydantic",
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@ loguru
pydantic
pyyaml
httpx
pytest
#pytest
emoji
# Chunk Library
#tqdm
24 changes: 18 additions & 6 deletions tldw_chatbook/DB/Client_Media_DB_v2.py
Original file line number Diff line number Diff line change
@@ -1917,9 +1917,12 @@ def add_media_with_keywords(self,
# --- Handle Unvectorized Chunks ---
if chunks is not None: # chunks argument was provided (could be empty or list of dicts)
if action == "updated":
# Hard delete old chunks for this media_id if updating
logging.debug(f"Hard deleting existing UnvectorizedMediaChunks for updated media_id {media_id}.")
conn.execute("DELETE FROM UnvectorizedMediaChunks WHERE media_id = ?", (media_id,))
# If overwriting and new chunks are provided, clear old ones.
# If `chunks` is an empty list, it also means clear old ones.
if overwrite: # Only delete if overwrite is true
logging.debug(
f"Hard deleting existing UnvectorizedMediaChunks for updated media_id {media_id} due to overwrite and new chunks being provided.")
conn.execute("DELETE FROM UnvectorizedMediaChunks WHERE media_id = ?", (media_id,))

num_chunks_saved = 0
if chunks: # If chunks list is not empty
@@ -1980,9 +1983,18 @@ def add_media_with_keywords(self,
raise DatabaseError(f"Failed to save chunk {i} due to integrity constraint: {e}") from e
logging.info(f"Saved {num_chunks_saved} unvectorized chunks for media_id {media_id}.")

# Update Media chunking_status to 'completed' as chunk processing is done (even if 0 chunks were provided)
conn.execute("UPDATE Media SET chunking_status = 'completed' WHERE id = ?", (media_id,))
logging.debug(f"Updated Media chunking_status to 'completed' for media_id {media_id} after chunk processing.")
# Update Media chunking_status
# If chunks were provided (even an empty list, meaning "clear existing and add these (none)"),
# then chunking is considered 'completed' from the perspective of this operation.
# If `chunks` was None (meaning "don't touch existing chunks"), status remains as is or 'pending'.
final_chunking_status_for_media = 'completed' # if chunks is not None
# If the main `perform_chunking` flag (from request, not DB field) was false,
# then perhaps status should be different. For now, if chunks data is passed, it's 'completed'.
# This might need more nuanced logic based on the `perform_chunking` flag from the original request.
conn.execute("UPDATE Media SET chunking_status = ? WHERE id = ?",
(final_chunking_status_for_media, media_id,))
logging.debug(
f"Updated Media chunking_status to '{final_chunking_status_for_media}' for media_id {media_id} after chunk processing.")

# Original chunk_options placeholder log
if chunk_options:
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
# /tldw_chatbook/Event_Handlers/llm_management_events_transformers.py
from __future__ import annotations

import logging
import shlex
import subprocess
from pathlib import Path
from typing import TYPE_CHECKING, List, Dict, Any, Optional
import functools # For download worker

from textual.widgets import Input, RichLog
from textual.css.query import QueryError

# For listing local models, you might need to interact with huggingface_hub or scan directories
try:
from huggingface_hub import HfApi, constants as hf_constants

# from huggingface_hub import list_models, model_info as hf_model_info # For online search
# from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
HUGGINGFACE_HUB_AVAILABLE = True
except ImportError:
HUGGINGFACE_HUB_AVAILABLE = False
hf_constants = None # type: ignore

if TYPE_CHECKING:
from tldw_chatbook.app import TldwCli
# textual_fspicker is imported dynamically in the handler

# Import shared helpers if needed
from .llm_management_events import \
_make_path_update_callback # _stream_process, stream_worker_output_to_log (not used by download worker directly)


# --- Worker function for model download (can be similar to the existing one) ---
def run_transformers_model_download_worker(app_instance: "TldwCli", command: List[str],
models_base_dir_for_cwd: str) -> str:
logger = getattr(app_instance, "loguru_logger", logging.getLogger(__name__))
quoted_command = ' '.join(shlex.quote(c) for c in command)
# The actual target download path is part of the command (--local-dir)
logger.info(f"Transformers Download WORKER starting: {quoted_command}")

process: Optional[subprocess.Popen] = None
final_status_message = f"Transformers Download WORKER: Default status for {quoted_command}"
pid_str = "N/A"

try:
# The command already includes --local-dir pointing to the exact target.
# We might want to run huggingface-cli from a neutral directory or models_base_dir_for_cwd
# if --local-dir is relative, but since we make it absolute, cwd is less critical.
# For consistency, let's use models_base_dir_for_cwd if provided and valid.
cwd_to_use = models_base_dir_for_cwd if Path(models_base_dir_for_cwd).is_dir() else None

process = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
universal_newlines=True,
bufsize=1,
cwd=cwd_to_use
)
pid_str = str(process.pid) if process and process.pid else "UnknownPID"
logger.info(f"Transformers Download WORKER: Subprocess launched, PID: {pid_str}")
app_instance.call_from_thread(app_instance._update_transformers_log, f"[PID:{pid_str}] Download starting...\n")

# communicate() waits for termination
stdout_data, stderr_data = process.communicate(timeout=600) # 10 min timeout for download

logger.info(
f"Transformers Download WORKER: communicate() completed. PID {pid_str}, Exit Code: {process.returncode}")

if stdout_data:
logger.info(f"Transformers Download WORKER STDOUT:\n{stdout_data.strip()}")
app_instance.call_from_thread(app_instance._update_transformers_log,
f"--- STDOUT (PID:{pid_str}) ---\n{stdout_data.strip()}\n")
if stderr_data:
logger.error(f"Transformers Download WORKER STDERR:\n{stderr_data.strip()}")
app_instance.call_from_thread(app_instance._update_transformers_log,
f"--- STDERR (PID:{pid_str}) ---\n[bold red]{stderr_data.strip()}[/]\n")

if process.returncode != 0:
final_status_message = f"Model download (PID:{pid_str}) failed with code: {process.returncode}."
if stderr_data: final_status_message += f"\nSTDERR: {stderr_data.strip()}"
else:
final_status_message = f"Model download (PID:{pid_str}) completed successfully (code: {process.returncode}). Model should be in target --local-dir."

app_instance.call_from_thread(app_instance._update_transformers_log, f"{final_status_message}\n")
return final_status_message

except FileNotFoundError:
msg = f"ERROR: huggingface-cli not found. Please ensure it's installed and in PATH."
logger.error(msg)
app_instance.call_from_thread(app_instance._update_transformers_log, f"[bold red]{msg}[/]\n")
raise
except subprocess.TimeoutExpired:
msg = f"ERROR: Model download (PID:{pid_str}) timed out after 600s."
logger.error(msg)
if process: process.kill()
app_instance.call_from_thread(app_instance._update_transformers_log, f"[bold red]{msg}[/]\n")
raise RuntimeError(msg) # Make worker fail
except Exception as err:
msg = f"CRITICAL ERROR in Transformers Download worker: {err} (Command: {quoted_command})"
logger.error(msg, exc_info=True)
app_instance.call_from_thread(app_instance._update_transformers_log, f"[bold red]{msg}[/]\n")
raise
finally:
logger.info(f"Transformers Download WORKER: Worker for '{quoted_command}' finishing.")
if process and process.poll() is None:
logger.warning(f"Transformers Download WORKER (PID:{pid_str}): Process still running in finally. Killing.")
process.kill()


async def handle_transformers_list_local_models_button_pressed(app: "TldwCli") -> None:
logger = getattr(app, "loguru_logger", logging.getLogger(__name__))
logger.info("Transformers list local models button pressed.")

models_dir_input: Input = app.query_one("#transformers-models-dir-path", Input)
models_list_widget: RichLog = app.query_one("#transformers-local-models-list", RichLog)
log_output_widget: RichLog = app.query_one("#transformers-log-output", RichLog)

models_dir_str = models_dir_input.value.strip()
if not models_dir_str:
app.notify("Please specify a local models directory first.", severity="warning")
models_dir_input.focus()
return

models_path = Path(models_dir_str).resolve() # Resolve to absolute path
if not models_path.is_dir():
app.notify(f"Directory not found: {models_path}", severity="error")
models_dir_input.focus()
return

models_list_widget.clear()
log_output_widget.write(f"Scanning for models in: {models_path}...\n")
app.notify("Scanning for local models...")

found_models_display = []
try:
# This basic scan looks for directories that might be model repos.
# A 'blobs' and 'refs' subdirectory alongside 'snapshots' is common for full cache structure.
# Individual model downloads might just have 'snapshots' or be flat.

# Heuristic 1: Look for 'snapshots' directory, then list its children
# These children are usually named after commit hashes. Inside them are the actual files.
# We need to find a way to map these back to a model name.
# Often, a .gitattributes or similar file at a higher level might exist.

# Heuristic 2: Look for directories containing config.json
# This is simpler but might find nested utility models or non-root model dirs.

count = 0
for item_path in models_path.rglob("config.json"):
if item_path.is_file():
model_root_dir = item_path.parent
# Try to infer a model name. This is tricky.
# If models_path is like ".../hub/models--org--modelname", then model_root_dir might be a snapshot hash.
# If models_path is a custom dir where user put "org/modelname" folders, it's easier.

display_name = ""
try:
# Attempt to make a "repo_id" like name from the path relative to models_path
relative_to_scan_root = model_root_dir.relative_to(models_path)
# If models_path is the HF cache, relative_to_scan_root might be "models--org--repo/snapshots/hash"
# We want to extract "org/repo"
parts = list(relative_to_scan_root.parts)
if parts and parts[0].startswith("models--"):
name_part = parts[0].replace("models--", "")
display_name = name_part.replace("--", "/", 1) # Replace only first --
else: # Assume a flatter structure or direct model name as folder
display_name = str(relative_to_scan_root)
except ValueError: # Not a subpath, models_path itself might be the model_root_dir
if model_root_dir == models_path:
display_name = models_path.name
else: # Some other structure
display_name = model_root_dir.name # Best guess

# Check for actual model files
has_weights = (model_root_dir / "pytorch_model.bin").exists() or \
(model_root_dir / "model.safetensors").exists() or \
(model_root_dir / "tf_model.h5").exists()

if has_weights:
count += 1
found_models_display.append(f"[green]{display_name}[/] ([dim]at {model_root_dir}[/dim])")

if found_models_display:
models_list_widget.write("\n".join(found_models_display))
app.notify(f"Found {count} potential local models (based on config.json and weights).")
else:
models_list_widget.write("No model directories found with config.json and model weights.")
app.notify("No local models found with this scan method.", severity="information")
log_output_widget.write("Local model scan complete.\n")

except Exception as e:
logger.error(f"Error scanning for local models: {e}", exc_info=True)
log_output_widget.write(f"[bold red]Error scanning models: {e}[/]\n")
app.notify("Error during local model scan.", severity="error")


async def handle_transformers_download_model_button_pressed(app: "TldwCli") -> None:
logger = getattr(app, "loguru_logger", logging.getLogger(__name__))
logger.info("Transformers download model button pressed.")

repo_id_input: Input = app.query_one("#transformers-download-repo-id", Input)
revision_input: Input = app.query_one("#transformers-download-revision", Input)
models_dir_input: Input = app.query_one("#transformers-models-dir-path", Input)
log_output_widget: RichLog = app.query_one("#transformers-log-output", RichLog)

repo_id = repo_id_input.value.strip()
revision = revision_input.value.strip() or None
models_dir_str = models_dir_input.value.strip()

if not repo_id:
app.notify("Model Repo ID is required to download.", severity="error")
repo_id_input.focus()
return

if not models_dir_str:
# Default to HF cache if not specified, but warn user.
if HUGGINGFACE_HUB_AVAILABLE and hf_constants and Path(hf_constants.HF_HUB_CACHE).is_dir():
models_dir_str = str(hf_constants.HF_HUB_CACHE)
app.notify(f"No local directory set, will download to Hugging Face cache: {models_dir_str}",
severity="warning", timeout=7)
models_dir_input.value = models_dir_str # Update UI
else:
app.notify("Local models directory must be set to specify download location.", severity="error")
models_dir_input.focus()
return

# huggingface-cli download --local-dir specifies the *target* directory for THIS model's files.
# It will create subdirectories based on the repo structure under this path.
# Example: if --local-dir is /my/models/bert, files go into /my/models/bert/snapshots/hash/...
# We want the user-provided models_dir_str to be the root under which models are organized.
# So, the --local-dir for huggingface-cli should be models_dir_str itself, or a subfolder we define.
# Let's make it download into a subfolder named after the repo_id within models_dir_str for clarity.

# Sanitize repo_id for use as a directory name part
safe_repo_id_subdir = repo_id.replace("/", "--")
target_model_specific_dir = Path(models_dir_str) / safe_repo_id_subdir

log_output_widget.write(
f"Attempting to download '{repo_id}' (rev: {revision or 'latest'}) to '{target_model_specific_dir}'...\n")
target_model_specific_dir.mkdir(parents=True, exist_ok=True) # Ensure target dir exists

command = [
"huggingface-cli",
"download",
repo_id,
"--local-dir", str(target_model_specific_dir),
"--local-dir-use-symlinks", "False" # Usually want actual files for local management
]
if revision:
command.extend(["--revision", revision])

# The worker CWD should be a neutral place, or the parent of target_model_specific_dir
worker_cwd = models_dir_str

worker_callable = functools.partial(
run_transformers_model_download_worker,
app,
command,
worker_cwd
)

app.run_worker(
worker_callable,
group="transformers_download",
description=f"Downloading HF Model {repo_id}",
exclusive=False,
thread=True,
)
app.notify(f"Starting download for {repo_id}...")


async def handle_transformers_browse_models_dir_button_pressed(app: "TldwCli") -> None:
logger = getattr(app, "loguru_logger", logging.getLogger(__name__))
logger.debug("Transformers browse models directory button pressed.")

try:
from textual_fspicker import FileOpen, Filters # Ensure it's imported for runtime
except ImportError:
app.notify("File picker utility (textual-fspicker) not available.", severity="error")
logger.error("textual_fspicker not found for Transformers model dir browsing.")
return

default_loc_str = str(Path.home())
if HUGGINGFACE_HUB_AVAILABLE and hf_constants:
try:
# Use HF_HOME if set, otherwise default cache.
# hf_constants.HF_HUB_CACHE points to the 'hub' subdir, e.g., ~/.cache/huggingface/hub
# We might want to default to ~/.cache/huggingface or where user typically stores models
hf_cache_dir = Path(hf_constants.HF_HUB_CACHE)
if hf_cache_dir.is_dir():
default_loc_str = str(hf_cache_dir)
elif hf_cache_dir.parent.is_dir(): # Try one level up, e.g. ~/.cache/huggingface
default_loc_str = str(hf_cache_dir.parent)
except Exception: # pylint: disable=broad-except
pass

logger.debug(f"Transformers browse models dir: starting location '{default_loc_str}'")

await app.push_screen(
FileOpen(
location=default_loc_str,
select_dirs=True, # We want to select a directory
title="Select Local Hugging Face Models Directory",
# No specific filters needed for directory selection
),
# This callback will update the Input widget with id "transformers-models-dir-path"
callback=_make_path_update_callback(app, "transformers-models-dir-path"),
)




334 changes: 215 additions & 119 deletions tldw_chatbook/Event_Handlers/conv_char_events.py

Large diffs are not rendered by default.

638 changes: 471 additions & 167 deletions tldw_chatbook/Event_Handlers/ingest_events.py

Large diffs are not rendered by default.

20 changes: 18 additions & 2 deletions tldw_chatbook/UI/Conv_Char_Window.py
Original file line number Diff line number Diff line change
@@ -83,11 +83,27 @@ def compose(self) -> ComposeResult:
yield Button("Edit this Character", id="ccp-card-edit-button", variant="default")
yield Button("Save Changes", id="ccp-card-save-button", variant="success") # Added variant
yield Button("Clone Character", id="ccp-card-clone-button", variant="primary") # Added variant

# Container for character editing UI (initially hidden by CSS)
with Container(id="ccp-character-editor-view", classes="ccp-view-area"):
yield Static("Character Editor", classes="pane-title", id="ccp-center-pane-title-char-editor")
# Character editor fields will be mounted here
yield Label("Character Name:", classes="sidebar-label")
yield Input(id="ccp-editor-char-name-input", placeholder="Character name...", classes="sidebar-input")
yield Label("Avatar Path/URL:", classes="sidebar-label")
yield Input(id="ccp-editor-char-avatar-input", placeholder="Path or URL to avatar image...", classes="sidebar-input")
yield Label("Description:", classes="sidebar-label")
yield TextArea(id="ccp-editor-char-description-textarea", classes="sidebar-textarea ccp-prompt-textarea")
yield Label("Personality:", classes="sidebar-label")
yield TextArea(id="ccp-editor-char-personality-textarea", classes="sidebar-textarea ccp-prompt-textarea")
yield Label("Scenario:", classes="sidebar-label")
yield TextArea(id="ccp-editor-char-scenario-textarea", classes="sidebar-textarea ccp-prompt-textarea")
yield Label("First Message (Greeting):", classes="sidebar-label")
yield TextArea(id="ccp-editor-char-first-message-textarea", classes="sidebar-textarea ccp-prompt-textarea")
yield Label("Keywords (comma-separated):", classes="sidebar-label")
yield TextArea(id="ccp-editor-char-keywords-textarea", classes="sidebar-textarea ccp-prompt-textarea")
with Horizontal(classes="ccp-prompt-action-buttons"):
yield Button("Save Character", id="ccp-editor-char-save-button", variant="success", classes="sidebar-button")
yield Button("Clone Character", id="ccp-editor-char-clone-button", classes="sidebar-button")
yield Button("Cancel Edit", id="ccp-editor-char-cancel-button", variant="error", classes="sidebar-button hidden")

# Container for prompt editing UI (initially hidden by CSS)
with Container(id="ccp-prompt-editor-view", classes="ccp-view-area"):
230 changes: 128 additions & 102 deletions tldw_chatbook/UI/Ingest_Window.py

Large diffs are not rendered by default.

51 changes: 46 additions & 5 deletions tldw_chatbook/UI/LLM_Management_Window.py
Original file line number Diff line number Diff line change
@@ -143,11 +143,52 @@ def compose(self) -> ComposeResult:
yield RichLog(id="vllm-log-output", classes="log_output", wrap=True, highlight=True)
with Container(id="llm-view-onnx", classes="llm-view-area"):
yield Static("ONNX Management Area - Content Coming Soon!")
yield Container(
Static("Transformers Library Management Area - Content Coming Soon!"),
id="llm-view-transformers",
classes="llm-view-area",
)
# --- Transformers View ---
with Container(id="llm-view-transformers", classes="llm-view-area"):
with VerticalScroll():
yield Label("Hugging Face Transformers Model Management",
classes="section_label") # Use a consistent class like .section_label or .pane-title

yield Label("Local Models Root Directory (for listing/browsing):", classes="label")
with Container(classes="input_container"): # Re-use styling for input button
yield Input(id="transformers-models-dir-path",
placeholder="/path/to/your/hf_models_cache_or_local_dir")
yield Button("Browse Dir", id="transformers-browse-models-dir-button",
classes="browse_button")

yield Button("List Local Models", id="transformers-list-local-models-button",
classes="action_button")
yield RichLog(id="transformers-local-models-list", classes="log_output", markup=True,
highlight=False) # markup=True for Rich tags
yield Static("---", classes="separator") # Visual separator

yield Label("Download New Model:", classes="label section_label") # Use consistent class
yield Label("Model Repo ID (e.g., 'google-bert/bert-base-uncased'):", classes="label")
yield Input(id="transformers-download-repo-id", placeholder="username/model_name")
yield Label("Revision/Branch (optional):", classes="label")
yield Input(id="transformers-download-revision", placeholder="main")
yield Button("Download Model", id="transformers-download-model-button", classes="action_button")
yield Static("---", classes="separator")
yield Label("Run Custom Transformers Server Script:", classes="label section_label")
yield Label("Python Interpreter:", classes="label")
yield Input(id="transformers-python-path", value="python", placeholder="e.g., /path/to/venv/bin/python")
yield Label("Path to your Server Script (.py):", classes="label")
with Container(classes="input_container"):
yield Input(id="transformers-script-path", placeholder="/path/to/your_transformers_server_script.py")
yield Button("Browse Script", id="transformers-browse-script-button", classes="browse_button")
yield Label("Model to Load (ID or Path for script):", classes="label")
yield Input(id="transformers-server-model-arg", placeholder="Script-dependent model identifier")
yield Label("Host:", classes="label")
yield Input(id="transformers-server-host", value="127.0.0.1")
yield Label("Port:", classes="label")
yield Input(id="transformers-server-port", value="8003") # Example port
yield Label("Additional Script Arguments:", classes="label")
yield TextArea(id="transformers-server-additional-args", classes="additional_args_textarea", language="bash", theme="vscode_dark")
yield Button("Start Transformers Server", id="transformers-start-server-button", classes="action_button")
yield Button("Stop Transformers Server", id="transformers-stop-server-button", classes="action_button")

yield Label("Operations Log:", classes="label section_label") # Use consistent class
yield RichLog(id="transformers-log-output", classes="log_output", wrap=True, highlight=True)
yield Container(
Static("Local Model Management Area - Content Coming Soon!"),
id="llm-view-local-models",
241 changes: 188 additions & 53 deletions tldw_chatbook/app.py

Large diffs are not rendered by default.

54 changes: 48 additions & 6 deletions tldw_chatbook/css/tldw_cli.tcss
Original file line number Diff line number Diff line change
@@ -317,6 +317,7 @@ Footer { dock: bottom; height: 1; background: $accent-darken-1; }
width: 100%;
height: auto; /* Allow height to be determined by content */
/* overflow: auto; /* If content within might overflow */
overflow: auto;
}

/* Add this class to hide elements */
@@ -338,7 +339,10 @@ Footer { dock: bottom; height: 1; background: $accent-darken-1; }
}

#ccp-character-editor-view {
display: none; /* Initially hidden, to be shown by Python logic */
display: none; /* Initially hidden */
layout: vertical; /* Important for stacking the scroller and button bar */
width: 100%;
height: 100%; /* Fill the .cc-center-pane */
}

/* Ensure the right pane sections also respect hidden class */
@@ -365,7 +369,7 @@ Footer { dock: bottom; height: 1; background: $accent-darken-1; }

/* --- Prompts Sidebar Vertical --- */
.ccp-prompt-textarea { /* Specific class for prompt textareas if needed */
height: 10; /* Example height */
height: 20; /* Example height - Increased from 10 */
/* width: 100%; (from .sidebar-textarea) */
/* margin-bottom: 1; (from .sidebar-textarea) */
}
@@ -375,11 +379,17 @@ Footer { dock: bottom; height: 1; background: $accent-darken-1; }
border: round $surface;
margin-bottom: 1;
}

.ccp-card-action-buttons {
height: auto; /* Let it size to content */
width: 100%;
margin-top: 1; /* Space above buttons */
margin-bottom: 2; /* Extra space below to ensure buttons are visible */
}
.ccp-prompt-action-buttons {
margin-top: 1; /* Add space above the button bar */
height: auto; /* Allow container height to fit buttons */
padding-bottom: 1; /* Add space below buttons before parent's padding */
width: 100%; /* Full width for the button bar */
/* padding-bottom: 1; Removed, parent #ccp-character-editor-view now handles this */
}

.ccp-prompt-action-buttons Button {
@@ -400,6 +410,15 @@ Footer { dock: bottom; height: 1; background: $accent-darken-1; }
#conv-char-right-pane Collapsible.-active > .collapsible--header { /* Optional: when expanded */
background: $primary-background;
}

/* TextAreas for Character Card Display */
.ccp-card-textarea {
height: 15;
width: 100%;
margin-bottom: 1;
border: round $surface; /* Ensuring consistent styling */
}

/* --- End of Prompts Sidebar Vertical --- */
/* --- End of Conversations, Characters & Prompts Window specific layouts --- */
/* ----------------------------- ************************* ----------------------------- */
@@ -734,6 +753,19 @@ MetricsScreen Label.-info-message {
#ingest-window { /* Matches TAB_INGEST */
layout: horizontal;
}
.tldw-api-media-specific-options { /* Common class for specific option blocks */
padding: 1;
border: round $surface;
margin-top: 1;
margin-bottom: 1;
}

/* Added to ensure initially hidden specific options are indeed hidden */
.tldw-api-media-specific-options.hidden {
padding: 1;
border: round $surface;
margin-top: 1;
}

.ingest-nav-pane { /* Style for the left navigation pane */
dock: left;
@@ -1294,11 +1326,15 @@ MetricsScreen Label.-info-message {
}

.ingest-textarea-small {
height: 3;
height: auto;
max-height: 10;
overflow-y: hidden;
margin-bottom: 1;
}
.ingest-textarea-medium {
height: 5;
height: auto;
max-height: 15;
overflow-y: hidden;
margin-bottom: 1;
}
.ingest-form-row {
@@ -1307,6 +1343,12 @@ MetricsScreen Label.-info-message {
height: auto;
margin-bottom: 1;
}
.title-author-row { /* New class for Title/Author row */
layout: horizontal;
width: 100%;
height: auto;
margin-bottom: 0 !important; /* Override existing margin */
}
.ingest-form-col {
width: 1fr;
padding: 0 1;
2 changes: 1 addition & 1 deletion tldw_chatbook/tldw_api/client.py
Original file line number Diff line number Diff line change
@@ -183,7 +183,7 @@ async def process_xml(self, request_data: ProcessXMLRequest, file_path: str) ->
)
return BatchProcessXMLResponse(
processed_count=1 if single_item_result.status not in ["Error"] else 0,
errors_count=1 if single_item_result.status == "Error" else 0,
errors_count=1 if single_item_result.status == "Error" or single_item_result.error else 0,
errors=[single_item_result.error] if single_item_result.error else [],
results=[single_item_result]
)
2 changes: 1 addition & 1 deletion tldw_chatbook/tldw_api/schemas.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
from pydantic import BaseModel, Field, HttpUrl

# Enum-like Literals from API schema
MediaType = Literal['video', 'audio', 'document', 'pdf', 'ebook', 'xml', 'mediawiki_dump'] # Added xml, mediawiki
MediaType = Literal['video', 'audio', 'document', 'pdf', 'ebook', 'xml', 'mediawiki_dump']
ChunkMethod = Literal['semantic', 'tokens', 'paragraphs', 'sentences', 'words', 'ebook_chapters', 'json']
PdfEngine = Literal['pymupdf4llm', 'pymupdf', 'docling']
ScrapeMethod = Literal["individual", "sitemap", "url_level", "recursive_scraping"]
32 changes: 22 additions & 10 deletions tldw_chatbook/tldw_api/utils.py
Original file line number Diff line number Diff line change
@@ -2,12 +2,13 @@
#
#
# Imports
import logging
from pathlib import Path
from typing import Dict, Any, Optional, List, IO, Tuple
import mimetypes
#
# 3rd-party Libraries
from pydantic import BaseModel
import httpx
#
#######################################################################################################################
#
@@ -56,24 +57,35 @@ def prepare_files_for_httpx(
file_path_obj = Path(file_path_str)
if not file_path_obj.is_file():
# Or raise an error, or log and skip
print(f"Warning: File not found or not a file: {file_path_str}")
# Consider using logging module here instead of logging.info for a library
logging.warning(f"Warning: File not found or not a file: {file_path_str}")
continue

file_obj = open(file_path_obj, "rb")
# Basic MIME type guessing, can be improved with `mimetypes` library
mime_type = None
if file_path_obj.suffix.lower() == ".mp4":
mime_type = "video/mp4"
elif file_path_obj.suffix.lower() == ".mp3":
mime_type = "audio/mpeg"
# Add more MIME types as needed

mime_type, _ = mimetypes.guess_type(file_path_obj.name) # Use filename for guessing

if mime_type is None:
# If the type can't be guessed, you can fallback to a generic MIME type
# 'application/octet-stream' is a common default for unknown binary data.
mime_type = 'application/octet-stream'
logging.warning(f"Could not guess MIME type for {file_path_obj.name}. Defaulting to {mime_type}.")
logging.info(f"Warning: Could not guess MIME type for {file_path_obj.name}. Defaulting to {mime_type}.")

httpx_files_list.append(
(upload_field_name, (file_path_obj.name, file_obj, mime_type))
)
except Exception as e:
print(f"Error preparing file {file_path_str} for upload: {e}")
# Consider using logging module here
logging.error(f"Error preparing file {file_path_str} for upload: {e}")
# Handle error, e.g., skip this file or raise
# If you skip, ensure file_obj is closed if it was opened.
# However, in this structure, if open() fails, the exception occurs before append.
# If an error occurs after open() but before append, the file might not be closed.
# Using a try/finally for file_obj.close() or opening file_obj within a
# `with open(...) as file_obj:` block inside the `prepare_files_for_httpx`
# is safer if you add logic between open() and append() that could fail.
# For now, httpx will manage the file objects passed to it.
return httpx_files_list if httpx_files_list else None

#