rmusser01
diff --git a/‎tldw_chatbook/Chat/Chat_Functions.py
Lines changed: 0 additions & 1 deletion b/‎tldw_chatbook/Chat/Chat_Functions.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎tldw_chatbook/Chunking/Chunk_Lib.py
Lines changed: 65 additions & 42 deletions b/‎tldw_chatbook/Chunking/Chunk_Lib.py
Lines changed: 65 additions & 42 deletions
diff --git a/‎tldw_chatbook/Coding/code_mapper.py
Lines changed: 3 additions & 1 deletion b/‎tldw_chatbook/Coding/code_mapper.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎tldw_chatbook/Constants.py
Lines changed: 43 additions & 25 deletions b/‎tldw_chatbook/Constants.py
Lines changed: 43 additions & 25 deletions
diff --git a/‎tldw_chatbook/DB/Client_Media_DB_v2.py
Lines changed: 44 additions & 0 deletions b/‎tldw_chatbook/DB/Client_Media_DB_v2.py
Lines changed: 44 additions & 0 deletions
@@ -74,7 +74,6 @@ def approximate_token_count(history):
         logger.error(f"Error calculating token count: {str(e)}")
         return 0
 
-# FIXME - Validate below
 # 1. Dispatch table for handler functions
 API_CALL_HANDLERS = {
     'openai': chat_with_openai,
 
@@ -5,7 +5,6 @@
 # Currently, uses naive approaches. Nothing fancy.
 #
 ####
-# Import necessary libraries
 import hashlib
 import json
 import re
@@ -14,22 +13,14 @@
 #
 # Import 3rd party
 from loguru import logger
-from tqdm import tqdm
 from langdetect import detect, LangDetectException # Import specific exception
-from transformers import AutoTokenizer, PreTrainedTokenizerBase # Using AutoTokenizer for flexibility
 import nltk
 from nltk.tokenize import sent_tokenize
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
+
 #
 # Import Local
 from tldw_chatbook.config import load_settings, get_cli_setting
-from tldw_chatbook.config import global_default_chunk_language
 #
-# FIXME
-def load_and_log_configs():
-    pass
-#######################################################################################################################
 #######################################################################################################################
 # Custom Exceptions
 class ChunkingError(Exception):
@@ -160,16 +151,28 @@ def __init__(self,
 
         logger.debug(f"Chunker initialized with options: {self.options}")
 
-        try:
-            # Use the tokenizer specified in options if available, otherwise use the argument
-            tokenizer_to_load = self.options.get('tokenizer_name_or_path', tokenizer_name_or_path)
-            self.tokenizer: PreTrainedTokenizerBase = AutoTokenizer.from_pretrained(tokenizer_to_load)
-            logger.info(f"Tokenizer '{tokenizer_to_load}' loaded successfully.")
-        except Exception as e:
-            logger.error(f"Failed to load tokenizer '{self.options.get('tokenizer_name_or_path', tokenizer_name_or_path)}': {e}. Some token-based methods may fail.")
-            # Fallback or raise error? For now, set to None and let methods handle it.
-            self.tokenizer = None
-            # raise ChunkingError(f"Failed to load tokenizer: {e}") from e
+        from transformers import PreTrainedTokenizerBase
+        self._tokenizer: Optional[PreTrainedTokenizerBase] = None
+        self._tokenizer_path_to_load: str = self.options.get('tokenizer_name_or_path', tokenizer_name_or_path)
+
+    from transformers import PreTrainedTokenizerBase
+    @property
+    def tokenizer(self) -> PreTrainedTokenizerBase:
+        if self._tokenizer is None:
+            try:
+                from transformers import AutoTokenizer, PreTrainedTokenizerBase # Import here
+                logger.info(f"Lazily loading tokenizer: {self._tokenizer_path_to_load}")
+                self._tokenizer = AutoTokenizer.from_pretrained(self._tokenizer_path_to_load)
+            except ImportError:
+                logger.error("Transformers library not found. Please install it to use token-based chunking.")
+                raise ChunkingError("Transformers library not found.")
+            except Exception as e:
+                logger.error(f"Failed to lazy-load tokenizer '{self._tokenizer_path_to_load}': {e}")
+                # Optionally, raise a more specific error or allow fallback if applicable
+                raise ChunkingError(f"Failed to load tokenizer: {e}") from e
+        if self._tokenizer is None: # Should not happen if logic above is correct, but as a safeguard
+            raise ChunkingError("Tokenizer could not be loaded.")
+        return self._tokenizer
 
     def _get_option(self, key: str, default_override: Optional[Any] = None) -> Any:
         """Helper to get an option, allowing for a dynamic default."""
@@ -259,10 +262,15 @@ def chunk_text(self,
             base_adaptive_size = self._get_option('base_adaptive_chunk_size')
             min_adaptive_size = self._get_option('min_adaptive_chunk_size')
             max_adaptive_size = self._get_option('max_adaptive_chunk_size')
-            if self.tokenizer: # NLTK based adaptive_chunk_size needs punkt
-                 max_size = self._adaptive_chunk_size_nltk(text, base_adaptive_size, min_adaptive_size, max_adaptive_size, language)
-            else: # Fallback if no tokenizer for NLTK based one.
-                 max_size = self._adaptive_chunk_size_non_punkt(text, base_adaptive_size, min_adaptive_size, max_adaptive_size)
+            # Accessing self.tokenizer property here will trigger lazy loading if not already loaded.
+            try:
+                if self.tokenizer: # NLTK based adaptive_chunk_size needs punkt
+                     max_size = self._adaptive_chunk_size_nltk(text, base_adaptive_size, min_adaptive_size, max_adaptive_size, language)
+                else: # Fallback if no tokenizer for NLTK based one. (tokenizer property would have raised if failed to load)
+                     max_size = self._adaptive_chunk_size_non_punkt(text, base_adaptive_size, min_adaptive_size, max_adaptive_size)
+            except ChunkingError: # Raised by tokenizer property if transformers not found or load fails
+                logger.warning("Tokenizer could not be loaded for adaptive chunk sizing. Using non-NLTK adaptive sizing.")
+                max_size = self._adaptive_chunk_size_non_punkt(text, base_adaptive_size, min_adaptive_size, max_adaptive_size)
             logger.info(f"Adaptive chunking adjusted max_size to: {max_size}")
 
 
@@ -279,8 +287,7 @@ def chunk_text(self,
         elif chunk_method == 'paragraphs':
             return self._chunk_text_by_paragraphs(text, max_paragraphs=max_size, overlap=overlap)
         elif chunk_method == 'tokens':
-            if not self.tokenizer:
-                raise ChunkingError("Tokenizer not loaded, cannot use 'tokens' chunking method.")
+            # self.tokenizer will raise ChunkingError if it cannot be loaded by its property.
             return self._chunk_text_by_tokens(text, max_tokens=max_size, overlap=overlap)
         elif chunk_method == 'semantic':
             # semantic_chunking needs to be a method of the class too
@@ -301,8 +308,7 @@ def chunk_text(self,
         elif chunk_method == 'rolling_summarize':
             if not llm_call_function:
                 raise ChunkingError("Missing 'llm_call_function' for 'rolling_summarize' method.")
-            if not self.tokenizer:  # Still need tokenizer for token counting in helper
-                raise ChunkingError("Tokenizer required for 'rolling_summarize' to estimate chunk sizes for LLM.")
+            # self.tokenizer will raise ChunkingError if it cannot be loaded by its property.
 
             summary = self._rolling_summarize(
                 text_to_summarize=text,
@@ -486,10 +492,8 @@ def _chunk_text_by_paragraphs(self, text: str, max_paragraphs: int, overlap: int
 
     def _chunk_text_by_tokens(self, text: str, max_tokens: int, overlap: int) -> List[str]:
         # This uses the accurate tokenizer version
-        if not self.tokenizer:
-            logger.error("Tokenizer not available for token-based chunking.")
-            raise ChunkingError("Tokenizer not loaded, cannot use 'tokens' chunking method.")
-
+        # Accessing self.tokenizer property here will trigger lazy loading.
+        # If it fails, ChunkingError will be raised by the property.
         logger.info(f"Chunking by tokens: max_tokens={max_tokens}, overlap_tokens={overlap} (token overlap)")
         if max_tokens <= 0:
             logger.warning("max_tokens must be positive. Returning single chunk or empty.")
@@ -642,11 +646,16 @@ def _semantic_chunking(self, text: str, max_chunk_size: int, unit: str) -> List[
         def _count_units(txt: str, unit_type: str) -> int:
             if unit_type == 'words':
                 return len(txt.split())
-            elif unit_type == 'tokens' and self.tokenizer:
+            elif unit_type == 'tokens': # self.tokenizer property will be used here
                 return len(self.tokenizer.encode(txt))
             elif unit_type == 'characters':
                 return len(txt)
-            logger.warning(f"Unknown unit type '{unit_type}' or tokenizer missing for tokens. Defaulting to word count.")
+            # Tokenizer might not be available if transformers is not installed.
+            # The self.tokenizer property would raise ChunkingError if called when not available.
+            # So, if unit_type is 'tokens' and we reach here, it should be available.
+            # However, to be safe, let's consider the case it might still be None if an error occurred
+            # but wasn't propagated in a way that prevented this call.
+            logger.warning(f"Unknown unit type '{unit_type}' or tokenizer issues for tokens. Defaulting to word count.")
             return len(txt.split())
 
 
@@ -927,9 +936,18 @@ def _chunk_ebook_by_chapters(self, text: str, max_size: int, overlap: int, custo
         for i, chap_data in enumerate(chapter_splits):
             chap_data['metadata']['chunk_index_in_book'] = i + 1
             chap_data['metadata']['total_chapters_detected'] = len(chapter_splits)
-            tokenizer_available = hasattr(self, 'tokenizer') and self.tokenizer and hasattr(self.tokenizer,
-                                                                                            'encode') and callable(
-                self.tokenizer.encode)
+            # Access self.tokenizer property, will lazy load or raise.
+            tokenizer_available = False
+            try:
+                # Check if tokenizer can be accessed and used
+                _ = self.tokenizer.encode("test") # A simple check that it works
+                tokenizer_available = True
+            except ChunkingError: # From tokenizer property
+                logger.warning("Tokenizer not available for sub-chunking ebook chapters by tokens.")
+            except Exception as e_tok_check: # Other unexpected errors
+                logger.warning(f"Unexpected error checking tokenizer for ebook sub-chunking: {e_tok_check}")
+
+
             if max_size > 0 and tokenizer_available and len(
                     # FIXME
                     self.tokenizer.encode(chap_data['text'])) > max_size:
@@ -1045,9 +1063,7 @@ def _rolling_summarize(self,
                            system_prompt_content: str,
                            additional_instructions: Optional[str]
                            ) -> str:
-        if not self.tokenizer: # Should have been checked by caller (chunk_text)
-            raise ChunkingError("Tokenizer required for rolling summarization.")
-
+        # self.tokenizer property will be accessed here.
         logger.info(f"Rolling summarization called. Detail: {detail}")
         text_token_length = len(self.tokenizer.encode(text_to_summarize))
         max_summarization_chunks = max(1, text_token_length // min_chunk_tokens)
@@ -1070,6 +1086,14 @@ def _rolling_summarize(self,
         if additional_instructions:
             final_system_prompt += f"\n\n{additional_instructions}"
 
+        try:
+            from tqdm import tqdm # Import here
+        except ImportError:
+            logger.warning("tqdm library not found. Progress bar for summarization parts will be disabled. Install with 'pip install tqdm'.")
+            # Define a dummy tqdm if not found, so the loop doesn't break
+            def tqdm(iterable, *args, **kwargs):
+                return iterable
+
         accumulated_summaries = []
         for i, chunk_for_llm in enumerate(tqdm(text_chunks_for_llm, desc="Summarizing parts", disable=not verbose)):
             user_message_content = chunk_for_llm
@@ -1117,8 +1141,7 @@ def _combine_chunks_for_llm(self,
                                  header: Optional[str] = None,
                                  add_ellipsis_for_overflow: bool = True,
                                  ) -> Tuple[List[str], List[List[int]], int]:
-        if not self.tokenizer:
-            raise ChunkingError("Tokenizer required for _combine_chunks_for_llm.")
+        # self.tokenizer property will be accessed here.
 
         dropped_chunk_count = 0
         output_combined_texts = []
 
@@ -8,7 +8,9 @@
 from collections import defaultdict
 #
 # Third-Party Imports
-from aider.repo_map import RepoMap, find_src_files  # Import necessary components
+#
+# Local Imports
+from tldw_chatbook.Third_Party.aider.repomap import RepoMap
 #
 ########################################################################################################################
 #
 
@@ -1974,9 +1974,28 @@
 MLX_LM_SERVER_ARGS_HELP_TEXT = """
 [bold cyan]--- MLX-LM Server Arguments ---[/]
 
+options:
+  --adapter-path ADAPTER_PATH
+                        Optional path for the trained adapter weights and
+                        config.
+  
+
+
+  --temp TEMP           Default sampling temperature (default: 0.0)
+  --top-p TOP_P         Default nucleus sampling top-p (default: 1.0)
+  --top-k TOP_K         Default top-k sampling (default: 0, disables top-k)
+  --min-p MIN_P         Default min-p sampling (default: 0.0, disables min-p)
+  --max-tokens MAX_TOKENS
+                        Default maximum number of tokens to generate (default:
+                        512)
+  --chat-template-args CHAT_TEMPLATE_ARGS
+                        A JSON formatted string of arguments for the
+                        tokenizer's apply_chat_template, e.g.
+                        '{"enable_thinking":false}'
+
 [bold]--model MODEL[/]
-  Path to the model directory or HuggingFace model ID
-  (e.g., [italic]--model mlx-community/Nous-Hermes-2-Mistral-7B-DPO-4bit-MLX[/])
+  The path to the MLX model weights, tokenizer, and config
+  (e.g., [italic]--model mlx-community/Qwen3-30B-A3B-4bit[/])
 
 [bold]--host HOST[/]
   Host address to bind the server to (default: 127.0.0.1)
@@ -1986,9 +2005,21 @@
   Port to run the server on (default: 8080)
   (e.g., [italic]--port 8000[/])
 
-[bold]--max-tokens N[/]
-  Maximum number of tokens to generate (default: 100)
-  (e.g., [italic]--max-tokens 512[/])
+[bold]--draft-model DRAFT_MODEL[/]
+    A model to be used for speculative decoding.
+    (e.g., [italic]--draft-model mlx-community/Qwen3-0.6B-8bit[/])
+
+[bold]--num-draft-tokens NUM_DRAFT_TOKENS[/]
+    Number of tokens to draft when using speculative decoding.
+
+[bold]--trust-remote-code[/]
+  Enable trusting remote code for tokenizer
+  
+[bold]--chat-template CHAT_TEMPLATE[/]
+    Specify a chat template for the tokenizer
+
+[bold]--use-default-chat-template[/]
+    Use the default chat template
 
 [bold]--temperature TEMP[/]
   Sampling temperature (default: 0.8)
@@ -2002,28 +2033,15 @@
   Top-k sampling (default: 40)
   (e.g., [italic]--top-k 50[/])
 
-[bold]--seed SEED[/]
-  Random seed for reproducibility (default: None)
-  (e.g., [italic]--seed 42[/])
+[bold]--min-p MIN_P[/]
+    Default min-p sampling (default: 0.0, disables min-p)
 
-[bold]--batch-size N[/]
-  Batch size for inference (default: 1)
-  (e.g., [italic]--batch-size 4[/])
-
-[bold]--quantization {int8,int4,fp16,fp32}[/]
-  Quantization method to use (default: None)
-  (e.g., [italic]--quantization int4[/])
-
-[bold]--device {cpu,gpu}[/]
-  Device to run inference on (default: auto-detect)
-  (e.g., [italic]--device gpu[/])
-
-[bold]--trust-remote-code[/]
-  Trust remote code when loading models from HuggingFace
+[bold]--max-tokens N[/]
+  Maximum number of tokens to generate (default: 100)
+  (e.g., [italic]--max-tokens 512[/])
 
-[bold]--revision REVISION[/]
-  Specific model revision to use from HuggingFace
-  (e.g., [italic]--revision main[/])
+[bold]--chat-template-args CHAT_TEMPLATE_ARGS[/]
+    A JSON formatted string of arguments for the tokenizer's apply_chat_template, e.g. '{"enable_thinking":false}'
 """
 
 # End of Constants.py
 
@@ -3951,6 +3951,50 @@ def search_media_by_keyword_for_embedding(self, keyword: str, limit: int = 1000)
         return self.get_media_by_ids_for_embedding(media_ids_found)
     # ============================= End of Embedding-related Functions ===================================================
 
+    # ============================= Chat UI Functions for Search ===================================================
+    def fetch_keywords_for_media_batch(self, media_ids: List[int]) -> Dict[int, List[str]]:
+        """
+        Fetches keywords associated with a batch of media IDs.
+
+        Args:
+            media_ids: List of media IDs to fetch keywords for.
+
+        Returns:
+            A dictionary mapping media IDs to lists of associated keywords.
+
+        Raises:
+            DatabaseError: If a database error occurs.
+        """
+        if not media_ids:
+            return {}
+
+        placeholders = ','.join('?' * len(media_ids))
+        query = f"""
+            SELECT mk.media_id, k.keyword
+            FROM MediaKeywords mk
+            JOIN Keywords k ON mk.keyword_id = k.id
+            WHERE mk.media_id IN ({placeholders}) AND k.deleted = 0
+        """
+        try:
+            conn = self.get_connection()
+            cursor = conn.execute(query, tuple(media_ids))
+            results = cursor.fetchall()
+
+            # Group keywords by media ID
+            keywords_by_media = {}
+            for media_id, keyword in results:
+                if media_id not in keywords_by_media:
+                    keywords_by_media[media_id] = []
+                keywords_by_media[media_id].append(keyword)
+
+            return keywords_by_media
+        except sqlite3.Error as e:
+            logger.error(f"Error fetching keywords for media batch: {e}", exc_info=True)
+            raise DatabaseError(f"Failed to fetch keywords for media batch: {e}") from e
+
+    # ============================= End of Chat UI Functions for Search ===================================================
+
+
 
 # =========================================================================
 # Standalone Functions (REQUIRE db_instance passed explicitly)
Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,9 @@`
`8`	`8`	`from collections import defaultdict`
`9`	`9`	`#`
`10`	`10`	`# Third-Party Imports`
`11`		`-from aider.repo_map import RepoMap, find_src_files # Import necessary components`
	`11`	`+#`
	`12`	`+# Local Imports`
	`13`	`+from tldw_chatbook.Third_Party.aider.repomap import RepoMap`
`12`	`14`	`#`
`13`	`15`	`########################################################################################################################`
`14`	`16`	`#`