Skip to content

Commit ea76b0e

Browse files
authored
Merge pull request #51 from rmusser01/dev
Sync
2 parents 6cd3be6 + 4787e6a commit ea76b0e

File tree

15 files changed

+1703
-480
lines changed

15 files changed

+1703
-480
lines changed

Tests/UI/test_ingest_window.py

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
# test_ingest_window.py
2+
#
3+
# Imports
4+
import pytest
5+
from pytest_mock import MockerFixture # For mocking
6+
from pathlib import Path
7+
#
8+
# Third-party Libraries
9+
from textual.app import App, ComposeResult
10+
from textual.widgets import Button, Input, Select, Checkbox, TextArea, RadioSet, RadioButton, Collapsible, ListView, \
11+
ListItem, Markdown, LoadingIndicator, Label, Static
12+
from textual.containers import Container, VerticalScroll, Horizontal, Vertical
13+
from textual.pilot import Pilot
14+
from textual.css.query import QueryError
15+
#
16+
# Local Imports
17+
from tldw_chatbook.app import TldwCli # The main app
18+
from tldw_chatbook.UI.Ingest_Window import IngestWindow, MEDIA_TYPES # Import MEDIA_TYPES
19+
from tldw_chatbook.tldw_api.schemas import ProcessVideoRequest, ProcessAudioRequest, ProcessPDFRequest, \
20+
ProcessEbookRequest, ProcessDocumentRequest, ProcessXMLRequest, ProcessMediaWikiRequest
21+
#
22+
#
23+
########################################################################################################################
24+
#
25+
# Fixtures and Helper Functions
26+
27+
# Helper to get the IngestWindow instance from the app
28+
async def get_ingest_window(pilot: Pilot) -> IngestWindow:
29+
ingest_window_query = pilot.app.query(IngestWindow)
30+
assert ingest_window_query.is_empty is False, "IngestWindow not found"
31+
return ingest_window_query.first()
32+
33+
34+
@pytest.fixture
35+
async def app_pilot() -> Pilot:
36+
app = TldwCli()
37+
async with app.run_test() as pilot:
38+
# Ensure the Ingest tab is active. Default is Chat.
39+
# Switching tabs is handled by app.py's on_button_pressed for tab buttons.
40+
# We need to find the Ingest tab button and click it.
41+
# Assuming tab IDs are like "tab-ingest"
42+
try:
43+
await pilot.click("#tab-ingest")
44+
except QueryError:
45+
# Fallback if direct ID click isn't working as expected in test setup
46+
# This might indicate an issue with tab IDs or pilot interaction timing
47+
all_buttons = pilot.app.query(Button)
48+
ingest_tab_button = None
49+
for btn in all_buttons:
50+
if btn.id == "tab-ingest":
51+
ingest_tab_button = btn
52+
break
53+
assert ingest_tab_button is not None, "Ingest tab button not found"
54+
await pilot.click(ingest_tab_button)
55+
56+
# Verify IngestWindow is present and active
57+
ingest_window = await get_ingest_window(pilot)
58+
assert ingest_window is not None
59+
assert ingest_window.display is True, "IngestWindow is not visible after switching to Ingest tab"
60+
# Also check the app's current_tab reactive variable
61+
assert pilot.app.current_tab == "ingest", "App's current_tab is not set to 'ingest'"
62+
yield pilot
63+
64+
65+
# Test Class
66+
class TestIngestWindowTLDWAPI:
67+
68+
async def test_initial_tldw_api_nav_buttons_and_views(self, app_pilot: Pilot):
69+
ingest_window = await get_ingest_window(app_pilot)
70+
# The IngestWindow itself is a container, nav buttons are direct children of its "ingest-nav-pane"
71+
nav_pane = ingest_window.query_one("#ingest-nav-pane")
72+
73+
for mt in MEDIA_TYPES:
74+
nav_button_id = f"ingest-nav-tldw-api-{mt.replace('_', '-')}" # IDs don't have #
75+
view_id = f"ingest-view-tldw-api-{mt.replace('_', '-')}"
76+
77+
# Check navigation button exists
78+
nav_button = nav_pane.query_one(f"#{nav_button_id}", Button)
79+
assert nav_button is not None, f"Navigation button {nav_button_id} not found"
80+
expected_label_part = mt.replace('_', ' ').title()
81+
if mt == "mediawiki_dump":
82+
expected_label_part = "MediaWiki Dump"
83+
assert expected_label_part in str(nav_button.label), f"Label for {nav_button_id} incorrect"
84+
85+
# Check view area exists
86+
view_area = ingest_window.query_one(f"#{view_id}", Container)
87+
assert view_area is not None, f"View area {view_id} not found"
88+
89+
# Check initial visibility based on app's active ingest view
90+
# This assumes that after switching to Ingest tab, a default sub-view *within* Ingest is activated.
91+
# If `ingest_active_view` is set (e.g. to "ingest-view-prompts" by default), then
92+
# all tldw-api views should be hidden.
93+
active_ingest_view_on_app = app_pilot.app.ingest_active_view
94+
if view_id != active_ingest_view_on_app:
95+
assert view_area.display is False, f"{view_id} should be hidden if not the active ingest view ('{active_ingest_view_on_app}')"
96+
else:
97+
assert view_area.display is True, f"{view_id} should be visible as it's the active ingest view ('{active_ingest_view_on_app}')"
98+
99+
@pytest.mark.parametrize("media_type", MEDIA_TYPES)
100+
async def test_tldw_api_navigation_and_view_display(self, app_pilot: Pilot, media_type: str):
101+
ingest_window = await get_ingest_window(app_pilot)
102+
nav_button_id = f"ingest-nav-tldw-api-{media_type.replace('_', '-')}"
103+
target_view_id = f"ingest-view-tldw-api-{media_type.replace('_', '-')}"
104+
105+
await app_pilot.click(f"#{nav_button_id}")
106+
await app_pilot.pause() # Allow watchers to update display properties
107+
108+
# Verify target view is visible
109+
target_view_area = ingest_window.query_one(f"#{target_view_id}", Container)
110+
assert target_view_area.display is True, f"{target_view_id} should be visible after clicking {nav_button_id}"
111+
assert app_pilot.app.ingest_active_view == target_view_id, f"App's active ingest view should be {target_view_id}"
112+
113+
# Verify other TLDW API views are hidden
114+
for other_mt in MEDIA_TYPES:
115+
if other_mt != media_type:
116+
other_view_id = f"ingest-view-tldw-api-{other_mt.replace('_', '-')}"
117+
other_view_area = ingest_window.query_one(f"#{other_view_id}", Container)
118+
assert other_view_area.display is False, f"{other_view_id} should be hidden when {target_view_id} is active"
119+
120+
# Verify common form elements exist with dynamic IDs
121+
common_endpoint_input = target_view_area.query_one(f"#tldw-api-endpoint-url-{media_type}", Input)
122+
assert common_endpoint_input is not None
123+
124+
common_submit_button = target_view_area.query_one(f"#tldw-api-submit-{media_type}", Button)
125+
assert common_submit_button is not None
126+
127+
# Verify media-specific options container and its widgets
128+
if media_type == "video":
129+
opts_container = target_view_area.query_one("#tldw-api-video-options", Container)
130+
assert opts_container.display is True
131+
widget = opts_container.query_one(f"#tldw-api-video-transcription-model-{media_type}", Input)
132+
assert widget is not None
133+
elif media_type == "audio":
134+
opts_container = target_view_area.query_one("#tldw-api-audio-options", Container)
135+
assert opts_container.display is True
136+
widget = opts_container.query_one(f"#tldw-api-audio-transcription-model-{media_type}", Input)
137+
assert widget is not None
138+
elif media_type == "pdf":
139+
opts_container = target_view_area.query_one("#tldw-api-pdf-options", Container)
140+
assert opts_container.display is True
141+
widget = opts_container.query_one(f"#tldw-api-pdf-engine-{media_type}", Select)
142+
assert widget is not None
143+
elif media_type == "ebook":
144+
opts_container = target_view_area.query_one("#tldw-api-ebook-options", Container)
145+
assert opts_container.display is True
146+
widget = opts_container.query_one(f"#tldw-api-ebook-extraction-method-{media_type}", Select)
147+
assert widget is not None
148+
elif media_type == "document": # Has minimal specific options currently
149+
opts_container = target_view_area.query_one("#tldw-api-document-options", Container)
150+
assert opts_container.display is True
151+
# Example: find the label if one exists
152+
try:
153+
label = opts_container.query_one(Label) # Assuming there's at least one label
154+
assert label is not None
155+
except QueryError: # If no labels, this is fine for doc
156+
pass
157+
elif media_type == "xml":
158+
opts_container = target_view_area.query_one("#tldw-api-xml-options", Container)
159+
assert opts_container.display is True
160+
widget = opts_container.query_one(f"#tldw-api-xml-auto-summarize-{media_type}", Checkbox)
161+
assert widget is not None
162+
elif media_type == "mediawiki_dump":
163+
opts_container = target_view_area.query_one("#tldw-api-mediawiki-options", Container)
164+
assert opts_container.display is True
165+
widget = opts_container.query_one(f"#tldw-api-mediawiki-wiki-name-{media_type}", Input)
166+
assert widget is not None
167+
168+
async def test_tldw_api_video_submission_data_collection(self, app_pilot: Pilot, mocker: MockerFixture):
169+
media_type = "video"
170+
ingest_window = await get_ingest_window(app_pilot)
171+
172+
# Navigate to video tab by clicking its nav button
173+
nav_button_id = f"ingest-nav-tldw-api-{media_type}"
174+
await app_pilot.click(f"#{nav_button_id}")
175+
await app_pilot.pause() # Allow UI to update
176+
177+
target_view_id = f"ingest-view-tldw-api-{media_type}"
178+
target_view_area = ingest_window.query_one(f"#{target_view_id}", Container)
179+
assert target_view_area.display is True, "Video view area not displayed after click"
180+
181+
# Mock the API client and its methods
182+
mock_api_client_instance = mocker.MagicMock()
183+
# Make process_video an async mock
184+
mock_process_video = mocker.AsyncMock(return_value=mocker.MagicMock())
185+
mock_api_client_instance.process_video = mock_process_video
186+
mock_api_client_instance.close = mocker.AsyncMock()
187+
188+
mocker.patch("tldw_chatbook.Event_Handlers.ingest_events.TLDWAPIClient", return_value=mock_api_client_instance)
189+
190+
# Set form values
191+
endpoint_url_input = target_view_area.query_one(f"#tldw-api-endpoint-url-{media_type}", Input)
192+
urls_textarea = target_view_area.query_one(f"#tldw-api-urls-{media_type}", TextArea)
193+
video_trans_model_input = target_view_area.query_one(f"#tldw-api-video-transcription-model-{media_type}", Input)
194+
auth_method_select = target_view_area.query_one(f"#tldw-api-auth-method-{media_type}", Select)
195+
196+
endpoint_url_input.value = "http://fakeapi.com"
197+
urls_textarea.text = "http://example.com/video.mp4"
198+
video_trans_model_input.value = "test_video_model"
199+
auth_method_select.value = "config_token"
200+
201+
app_pilot.app.app_config = {"tldw_api": {"auth_token_config": "fake_token"}}
202+
203+
submit_button_id = f"tldw-api-submit-{media_type}"
204+
await app_pilot.click(f"#{submit_button_id}")
205+
await app_pilot.pause(delay=0.5)
206+
207+
mock_process_video.assert_called_once()
208+
call_args = mock_process_video.call_args[0]
209+
210+
assert len(call_args) >= 1, "process_video not called with request_model"
211+
request_model_arg = call_args[0]
212+
213+
assert isinstance(request_model_arg, ProcessVideoRequest)
214+
assert request_model_arg.urls == ["http://example.com/video.mp4"]
215+
assert request_model_arg.transcription_model == "test_video_model"
216+
assert request_model_arg.api_key == "fake_token"
217+
218+
# Example for local_file_paths if it's the second argument
219+
if len(call_args) > 1:
220+
local_files_arg = call_args[1]
221+
assert local_files_arg == [], "local_files_arg was not empty"
222+
else:
223+
# This case implies process_video might not have received local_file_paths,
224+
# which could be an issue if it's expected. For now, let's assume it's optional.
225+
pass

pyproject.toml

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,26 @@ requires = ["setuptools>=61.0"]
44
build-backend = "setuptools.build_meta"
55

66
[project]
7-
name = "tldw_chatbook" # This is the distribution name (what you pip install)
7+
name = "tldw_chatbook"
88
version = "0.1.0"
99
authors = [
1010
{ name="Robert Musser", email="[email protected]" },
1111
]
1212
description = "A Textual TUI for chatting with LLMs, and interacting with the tldw server."
13-
readme = "README.md" # Assumes README.md is in the project root
13+
readme = "README.md"
1414
requires-python = ">=3.11"
15-
license = { file="LICENSE" } # Assumes LICENSE file is in the project root
15+
license = { file="LICENSE" }
1616
keywords = ["tui", "cli", "llm", "textual", "ai", "chat"]
1717
classifiers = [
1818
"Development Status :: 3 - Alpha",
1919
"Intended Audience :: Developers",
2020
"Intended Audience :: End Users/Desktop",
2121
"Topic :: Utilities",
2222
"Topic :: Terminals",
23-
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", # Corrected to AGPL
23+
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
2424
"Programming Language :: Python :: 3",
2525
"Programming Language :: Python :: 3.11",
2626
"Programming Language :: Python :: 3.12",
27-
# "Programming Language :: Python :: 3.13", # Only list if you actively test/support it
2827
"Environment :: Console",
2928
"Operating System :: OS Independent",
3029
]
@@ -34,9 +33,9 @@ dependencies = [
3433
"chardet",
3534
"httpx",
3635
"loguru",
37-
"textual>=3.3.0", # Specify a minimum Textual version if features depend on it
36+
"textual>=3.3.0",
3837
"requests",
39-
"rich", # Usually pulled in by Textual, but explicit is fine
38+
"rich",
4039
"Pillow",
4140
"PyYAML",
4241
"pydantic",

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ loguru
1010
pydantic
1111
pyyaml
1212
httpx
13-
pytest
13+
#pytest
1414
emoji
1515
# Chunk Library
1616
#tqdm

tldw_chatbook/DB/Client_Media_DB_v2.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1917,9 +1917,12 @@ def add_media_with_keywords(self,
19171917
# --- Handle Unvectorized Chunks ---
19181918
if chunks is not None: # chunks argument was provided (could be empty or list of dicts)
19191919
if action == "updated":
1920-
# Hard delete old chunks for this media_id if updating
1921-
logging.debug(f"Hard deleting existing UnvectorizedMediaChunks for updated media_id {media_id}.")
1922-
conn.execute("DELETE FROM UnvectorizedMediaChunks WHERE media_id = ?", (media_id,))
1920+
# If overwriting and new chunks are provided, clear old ones.
1921+
# If `chunks` is an empty list, it also means clear old ones.
1922+
if overwrite: # Only delete if overwrite is true
1923+
logging.debug(
1924+
f"Hard deleting existing UnvectorizedMediaChunks for updated media_id {media_id} due to overwrite and new chunks being provided.")
1925+
conn.execute("DELETE FROM UnvectorizedMediaChunks WHERE media_id = ?", (media_id,))
19231926

19241927
num_chunks_saved = 0
19251928
if chunks: # If chunks list is not empty
@@ -1980,9 +1983,18 @@ def add_media_with_keywords(self,
19801983
raise DatabaseError(f"Failed to save chunk {i} due to integrity constraint: {e}") from e
19811984
logging.info(f"Saved {num_chunks_saved} unvectorized chunks for media_id {media_id}.")
19821985

1983-
# Update Media chunking_status to 'completed' as chunk processing is done (even if 0 chunks were provided)
1984-
conn.execute("UPDATE Media SET chunking_status = 'completed' WHERE id = ?", (media_id,))
1985-
logging.debug(f"Updated Media chunking_status to 'completed' for media_id {media_id} after chunk processing.")
1986+
# Update Media chunking_status
1987+
# If chunks were provided (even an empty list, meaning "clear existing and add these (none)"),
1988+
# then chunking is considered 'completed' from the perspective of this operation.
1989+
# If `chunks` was None (meaning "don't touch existing chunks"), status remains as is or 'pending'.
1990+
final_chunking_status_for_media = 'completed' # if chunks is not None
1991+
# If the main `perform_chunking` flag (from request, not DB field) was false,
1992+
# then perhaps status should be different. For now, if chunks data is passed, it's 'completed'.
1993+
# This might need more nuanced logic based on the `perform_chunking` flag from the original request.
1994+
conn.execute("UPDATE Media SET chunking_status = ? WHERE id = ?",
1995+
(final_chunking_status_for_media, media_id,))
1996+
logging.debug(
1997+
f"Updated Media chunking_status to '{final_chunking_status_for_media}' for media_id {media_id} after chunk processing.")
19861998

19871999
# Original chunk_options placeholder log
19882000
if chunk_options:

0 commit comments

Comments
 (0)