|
23 | 23 | from typing import Optional, Union, Generator, Any, Dict, List, Callable
|
24 | 24 | #
|
25 | 25 | # 3rd-Party Imports
|
| 26 | +from loguru import logger |
26 | 27 | import requests
|
27 | 28 | from requests.adapters import HTTPAdapter
|
28 | 29 | from urllib3 import Retry
|
29 | 30 | #
|
30 | 31 | # Import Local
|
31 |
| -from tldw_chatbook.Chunking.Chunk_Lib import ( |
32 |
| - improved_chunking_process |
33 |
| -) |
34 | 32 | from tldw_chatbook.LLM_Calls.Local_Summarization_Lib import (
|
35 | 33 | summarize_with_llama,
|
36 | 34 | summarize_with_kobold,
|
|
44 | 42 | )
|
45 | 43 | from tldw_chatbook.Logging_Config import logging
|
46 | 44 | from tldw_chatbook.config import get_cli_setting
|
47 |
| - |
| 45 | +try: |
| 46 | + from tldw_chatbook.Chunking.Chunk_Lib import ( |
| 47 | + improved_chunking_process |
| 48 | + ) |
| 49 | + CHUNKER_AVAILABLE = True |
| 50 | +except ImportError: |
| 51 | + logger.warning("Failed to import chunking library. Will not be available.") |
| 52 | + CHUNKER_AVAILABLE = False |
48 | 53 |
|
49 | 54 | # FIXME
|
50 | 55 | def load_and_log_configs():
|
@@ -418,82 +423,83 @@ def consume_generator(gen):
|
418 | 423 | default_chunk_opts = {'method': 'sentences', 'max_size': 500, 'overlap': 200}
|
419 | 424 | current_chunk_options = chunk_options if isinstance(chunk_options, dict) else default_chunk_opts
|
420 | 425 |
|
421 |
| - if recursive_summarization: |
422 |
| - logging.info("Performing recursive summarization.") |
423 |
| - chunks_data = improved_chunking_process(text_content, current_chunk_options) # Renamed variable for clarity |
424 |
| - if not chunks_data: |
425 |
| - logging.warning("Recursive summarization: Chunking produced no chunks.") |
426 |
| - return "Error: Recursive summarization failed - no chunks generated." |
427 |
| - |
428 |
| - # Extract just the text from the chunk data |
429 |
| - text_chunks = [chunk['text'] for chunk in chunks_data] |
430 |
| - logging.debug(f"Generated {len(text_chunks)} text chunks for recursive summarization.") |
431 |
| - |
432 |
| - # Define the summarizer function for recursive_summarize_chunks |
433 |
| - # It must accept ONE argument (the text) and return the summary string. |
434 |
| - # It captures necessary variables (api_name, key, temp, prompts, etc.) from the outer scope (closure). |
435 |
| - # It must handle potential errors from the API call and return an error string if needed. |
436 |
| - def recursive_step_processor(text_to_summarize: str) -> str: |
437 |
| - logging.debug(f"recursive_step_processor called with text length: {len(text_to_summarize)}") |
438 |
| - # Force non-streaming for internal steps and consume immediately |
439 |
| - api_result = _dispatch_to_api( |
440 |
| - text_to_summarize, |
441 |
| - custom_prompt_arg, # Custom prompt is handled by _dispatch_to_api |
442 |
| - api_name, |
443 |
| - api_key, |
444 |
| - temp, |
445 |
| - system_message, # System message is handled by _dispatch_to_api |
446 |
| - streaming=False # IMPORTANT: Force non-streaming for internal recursive steps |
| 426 | + if CHUNKER_AVAILABLE == True: |
| 427 | + if recursive_summarization: |
| 428 | + logging.info("Performing recursive summarization.") |
| 429 | + chunks_data = improved_chunking_process(text_content, current_chunk_options) # Renamed variable for clarity |
| 430 | + if not chunks_data: |
| 431 | + logging.warning("Recursive summarization: Chunking produced no chunks.") |
| 432 | + return "Error: Recursive summarization failed - no chunks generated." |
| 433 | + |
| 434 | + # Extract just the text from the chunk data |
| 435 | + text_chunks = [chunk['text'] for chunk in chunks_data] |
| 436 | + logging.debug(f"Generated {len(text_chunks)} text chunks for recursive summarization.") |
| 437 | + |
| 438 | + # Define the summarizer function for recursive_summarize_chunks |
| 439 | + # It must accept ONE argument (the text) and return the summary string. |
| 440 | + # It captures necessary variables (api_name, key, temp, prompts, etc.) from the outer scope (closure). |
| 441 | + # It must handle potential errors from the API call and return an error string if needed. |
| 442 | + def recursive_step_processor(text_to_summarize: str) -> str: |
| 443 | + logging.debug(f"recursive_step_processor called with text length: {len(text_to_summarize)}") |
| 444 | + # Force non-streaming for internal steps and consume immediately |
| 445 | + api_result = _dispatch_to_api( |
| 446 | + text_to_summarize, |
| 447 | + custom_prompt_arg, # Custom prompt is handled by _dispatch_to_api |
| 448 | + api_name, |
| 449 | + api_key, |
| 450 | + temp, |
| 451 | + system_message, # System message is handled by _dispatch_to_api |
| 452 | + streaming=False # IMPORTANT: Force non-streaming for internal recursive steps |
| 453 | + ) |
| 454 | + # consume_generator handles both strings and generators, returning a string |
| 455 | + processed_result = consume_generator(api_result) |
| 456 | + |
| 457 | + # Ensure the result is a string (consume_generator should do this) |
| 458 | + if not isinstance(processed_result, str): |
| 459 | + logging.error(f"API dispatch/consumption did not return a string. Got: {type(processed_result)}") |
| 460 | + # Return an error string that recursive_summarize_chunks can detect |
| 461 | + return f"Error: Internal summarization step failed to produce string output (got {type(processed_result)})" |
| 462 | + |
| 463 | + logging.debug(f"recursive_step_processor finished. Result length: {len(processed_result)}") |
| 464 | + # Return the result string (which could be a summary or an error message from consume_generator) |
| 465 | + return processed_result |
| 466 | + |
| 467 | + # Call the simplified recursive_summarize_chunks utility |
| 468 | + # It now only needs the list of text chunks and the processing function |
| 469 | + final_result = recursive_summarize_chunks( |
| 470 | + chunks=text_chunks, |
| 471 | + summarize_func=recursive_step_processor |
447 | 472 | )
|
448 |
| - # consume_generator handles both strings and generators, returning a string |
449 |
| - processed_result = consume_generator(api_result) |
450 |
| - |
451 |
| - # Ensure the result is a string (consume_generator should do this) |
452 |
| - if not isinstance(processed_result, str): |
453 |
| - logging.error(f"API dispatch/consumption did not return a string. Got: {type(processed_result)}") |
454 |
| - # Return an error string that recursive_summarize_chunks can detect |
455 |
| - return f"Error: Internal summarization step failed to produce string output (got {type(processed_result)})" |
456 |
| - |
457 |
| - logging.debug(f"recursive_step_processor finished. Result length: {len(processed_result)}") |
458 |
| - # Return the result string (which could be a summary or an error message from consume_generator) |
459 |
| - return processed_result |
460 |
| - |
461 |
| - # Call the simplified recursive_summarize_chunks utility |
462 |
| - # It now only needs the list of text chunks and the processing function |
463 |
| - final_result = recursive_summarize_chunks( |
464 |
| - chunks=text_chunks, |
465 |
| - summarize_func=recursive_step_processor |
466 |
| - ) |
467 |
| - # The result of recursive_summarize_chunks is now the final string summary or an error string |
468 |
| - |
469 |
| - elif chunked_summarization: |
470 |
| - logging.info("Performing chunked summarization (summarize each, then combine).") |
471 |
| - chunks = improved_chunking_process(text_content, current_chunk_options) |
472 |
| - if not chunks: |
473 |
| - logging.warning("Chunked summarization: Chunking produced no chunks.") |
474 |
| - return "Error: Chunked summarization failed - no chunks generated." |
475 |
| - logging.debug(f"Generated {len(chunks)} chunks for chunked summarization.") |
476 |
| - |
477 |
| - chunk_summaries = [] |
478 |
| - for i, chunk in enumerate(chunks): |
479 |
| - logging.debug(f"Summarizing chunk {i+1}/{len(chunks)}") |
480 |
| - # Summarize each chunk - force non-streaming for API call |
481 |
| - chunk_summary_result = _dispatch_to_api( |
482 |
| - chunk['text'], custom_prompt_arg, api_name, api_key, |
483 |
| - temp, system_message, streaming=False # Force non-streaming |
484 |
| - ) |
485 |
| - # Consume generator immediately |
486 |
| - processed_chunk_summary = consume_generator(chunk_summary_result) |
487 |
| - |
488 |
| - if isinstance(processed_chunk_summary, str) and not processed_chunk_summary.startswith("Error:"): |
489 |
| - chunk_summaries.append(processed_chunk_summary) |
490 |
| - else: |
491 |
| - error_detail = processed_chunk_summary if isinstance(processed_chunk_summary, str) else "Unknown error" |
492 |
| - logging.warning(f"Failed to summarize chunk {i+1}: {error_detail}") |
493 |
| - chunk_summaries.append(f"[Error summarizing chunk {i+1}: {error_detail}]") # Add error placeholder |
| 473 | + # The result of recursive_summarize_chunks is now the final string summary or an error string |
| 474 | + |
| 475 | + elif chunked_summarization: |
| 476 | + logging.info("Performing chunked summarization (summarize each, then combine).") |
| 477 | + chunks = improved_chunking_process(text_content, current_chunk_options) |
| 478 | + if not chunks: |
| 479 | + logging.warning("Chunked summarization: Chunking produced no chunks.") |
| 480 | + return "Error: Chunked summarization failed - no chunks generated." |
| 481 | + logging.debug(f"Generated {len(chunks)} chunks for chunked summarization.") |
| 482 | + |
| 483 | + chunk_summaries = [] |
| 484 | + for i, chunk in enumerate(chunks): |
| 485 | + logging.debug(f"Summarizing chunk {i+1}/{len(chunks)}") |
| 486 | + # Summarize each chunk - force non-streaming for API call |
| 487 | + chunk_summary_result = _dispatch_to_api( |
| 488 | + chunk['text'], custom_prompt_arg, api_name, api_key, |
| 489 | + temp, system_message, streaming=False # Force non-streaming |
| 490 | + ) |
| 491 | + # Consume generator immediately |
| 492 | + processed_chunk_summary = consume_generator(chunk_summary_result) |
| 493 | + |
| 494 | + if isinstance(processed_chunk_summary, str) and not processed_chunk_summary.startswith("Error:"): |
| 495 | + chunk_summaries.append(processed_chunk_summary) |
| 496 | + else: |
| 497 | + error_detail = processed_chunk_summary if isinstance(processed_chunk_summary, str) else "Unknown error" |
| 498 | + logging.warning(f"Failed to summarize chunk {i+1}: {error_detail}") |
| 499 | + chunk_summaries.append(f"[Error summarizing chunk {i+1}: {error_detail}]") # Add error placeholder |
494 | 500 |
|
495 |
| - # Combine the summaries |
496 |
| - final_result = "\n\n---\n\n".join(chunk_summaries) # Join with a separator |
| 501 | + # Combine the summaries |
| 502 | + final_result = "\n\n---\n\n".join(chunk_summaries) # Join with a separator |
497 | 503 |
|
498 | 504 | else:
|
499 | 505 | # No chunking - direct summarization
|
|
0 commit comments