|
| 1 | +# tldw_chatbook/Coding/code_mapper.py |
| 2 | +# Description: This module provides a context manager for handling code files in a TUI application. |
| 3 | +# |
| 4 | +# Imports |
| 5 | +import os |
| 6 | +import time |
| 7 | +from pathlib import Path |
| 8 | +from collections import defaultdict |
| 9 | +# |
| 10 | +# Third-Party Imports |
| 11 | +from aider.repo_map import RepoMap, find_src_files # Import necessary components |
| 12 | +# |
| 13 | +######################################################################################################################## |
| 14 | +# |
| 15 | +# You might need to provide stubs or mock objects for RepoMap's dependencies |
| 16 | +# if you're not running the full Aider environment, e.g., for `io` and `main_model`. |
| 17 | +class SimpleIO: |
| 18 | + def tool_output(self, message): |
| 19 | + print(f"[INFO] {message}") |
| 20 | + |
| 21 | + def tool_warning(self, message): |
| 22 | + print(f"[WARNING] {message}") |
| 23 | + |
| 24 | + def tool_error(self, message): |
| 25 | + print(f"[ERROR] {message}") |
| 26 | + |
| 27 | + def read_text(self, fpath): |
| 28 | + try: |
| 29 | + with open(fpath, "r", encoding="utf-8", errors="ignore") as f: |
| 30 | + return f.read() |
| 31 | + except Exception as e: |
| 32 | + self.tool_error(f"Error reading {fpath}: {e}") |
| 33 | + return None |
| 34 | + |
| 35 | + |
| 36 | +class MockModel: |
| 37 | + def token_count(self, text): |
| 38 | + # A simple approximation for token counting. |
| 39 | + # For more accuracy, integrate with a real tokenizer (e.g., tiktoken). |
| 40 | + return len(text.split()) |
| 41 | + |
| 42 | + |
| 43 | +class CodeContextManager: |
| 44 | + def __init__(self, repo_root, aider_map_tokens=1024, verbose=False): |
| 45 | + self.repo_root = os.path.abspath(repo_root) |
| 46 | + self.verbose = verbose |
| 47 | + |
| 48 | + # --- Initialize Aider's RepoMap --- |
| 49 | + # You'll need to provide implementations or stubs for `io` and `main_model` |
| 50 | + # if they are strictly required by the parts of RepoMap you use. |
| 51 | + self.io = SimpleIO() # Replace with your TUI's IO if it has one |
| 52 | + self.main_model = MockModel() # Replace with a proper model tokenizer |
| 53 | + |
| 54 | + self.aider_repo_map = RepoMap( |
| 55 | + map_tokens=aider_map_tokens, |
| 56 | + root=self.repo_root, |
| 57 | + main_model=self.main_model, |
| 58 | + io=self.io, |
| 59 | + verbose=self.verbose, |
| 60 | + # You might want to configure other RepoMap params as needed |
| 61 | + ) |
| 62 | + # To store data for "indexing for display" |
| 63 | + self.file_index = {} # {rel_fpath: {"abs_fpath": str, "tags": list[Tag], "mtime": float, "error": str}} |
| 64 | + self.last_index_time = 0 |
| 65 | + |
| 66 | + # --- Goal 1: Indexing for Display and Review --- |
| 67 | + def get_file_list_for_display(self, force_rescan=False): |
| 68 | + """ |
| 69 | + Scans the repository (or uses cached data) to get a list of all files |
| 70 | + and their top-level symbols/tags for display in a TUI. |
| 71 | +
|
| 72 | + Args: |
| 73 | + force_rescan (bool): If True, forces a re-scan of all files, ignoring mtime checks. |
| 74 | +
|
| 75 | + Returns: |
| 76 | + dict: {rel_fpath: {"abs_fpath": str, "tags_summary": list[str], "error": str or None}} |
| 77 | + tags_summary might be like ["class MyClass", "def my_func"] |
| 78 | + """ |
| 79 | + print("Building file index for display...") |
| 80 | + current_scan_time = time.time() |
| 81 | + updated_files = 0 |
| 82 | + processed_files = 0 |
| 83 | + |
| 84 | + # Discover all potential source files in the repository |
| 85 | + # You might want to use a more sophisticated discovery mechanism |
| 86 | + # like git ls-files, or respect .gitignore. Aider's RepoMap |
| 87 | + # often gets file lists from git, so it might not have its own |
| 88 | + # comprehensive discover_files respecting .gitignore. |
| 89 | + # For now, let's use a simple walk. |
| 90 | + all_repo_files = [] |
| 91 | + for root, _, files in os.walk(self.repo_root): |
| 92 | + if ".git" in root.split(os.sep): # Basic .git ignore |
| 93 | + continue |
| 94 | + for file in files: |
| 95 | + abs_fpath = os.path.join(root, file) |
| 96 | + # Filter out some common non-code files (can be improved) |
| 97 | + if not any(abs_fpath.endswith(ext) for ext in |
| 98 | + [".py", ".js", ".ts", ".java", ".c", ".cpp", ".h", ".hpp", ".rs", ".go", ".md"]): |
| 99 | + if self.aider_repo_map.get_rel_fname(abs_fpath).startswith('.'): # hidden files |
| 100 | + continue |
| 101 | + # Check if language can be determined by Aider, crude filter for now |
| 102 | + if not self.aider_repo_map.filename_to_lang(abs_fpath): |
| 103 | + continue |
| 104 | + |
| 105 | + all_repo_files.append(abs_fpath) |
| 106 | + |
| 107 | + display_index = {} |
| 108 | + |
| 109 | + for abs_fpath in all_repo_files: |
| 110 | + processed_files += 1 |
| 111 | + if processed_files % 100 == 0 and self.verbose: |
| 112 | + self.io.tool_output(f"Scanned {processed_files}/{len(all_repo_files)} files for index...") |
| 113 | + |
| 114 | + rel_fpath = self.aider_repo_map.get_rel_fname(abs_fpath) |
| 115 | + try: |
| 116 | + current_mtime = os.path.getmtime(abs_fpath) |
| 117 | + except FileNotFoundError: |
| 118 | + if rel_fpath in self.file_index: |
| 119 | + del self.file_index[rel_fpath] # Remove if deleted |
| 120 | + continue |
| 121 | + |
| 122 | + # Check cache |
| 123 | + if not force_rescan and rel_fpath in self.file_index and self.file_index[rel_fpath][ |
| 124 | + "mtime"] == current_mtime: |
| 125 | + # Use cached tags if mtime hasn't changed |
| 126 | + tags = self.file_index[rel_fpath]["tags"] |
| 127 | + error_msg = self.file_index[rel_fpath]["error"] |
| 128 | + else: |
| 129 | + # Get fresh tags using Aider's method |
| 130 | + # get_tags returns a list of Tag namedtuples |
| 131 | + try: |
| 132 | + tags = list(self.aider_repo_map.get_tags(abs_fpath, rel_fpath)) |
| 133 | + error_msg = None |
| 134 | + self.file_index[rel_fpath] = { |
| 135 | + "abs_fpath": abs_fpath, |
| 136 | + "tags": tags, |
| 137 | + "mtime": current_mtime, |
| 138 | + "error": None |
| 139 | + } |
| 140 | + updated_files += 1 |
| 141 | + except Exception as e: |
| 142 | + tags = [] |
| 143 | + error_msg = f"Error processing {rel_fpath}: {e}" |
| 144 | + self.file_index[rel_fpath] = { |
| 145 | + "abs_fpath": abs_fpath, |
| 146 | + "tags": [], |
| 147 | + "mtime": current_mtime, |
| 148 | + "error": str(e) |
| 149 | + } |
| 150 | + if self.verbose: self.io.tool_warning(error_msg) |
| 151 | + |
| 152 | + # Prepare a summary for display (e.g., class and function definitions) |
| 153 | + tags_summary = [] |
| 154 | + if tags: |
| 155 | + for tag_obj in tags: |
| 156 | + if tag_obj.kind == "def": # We are interested in definitions for tree view |
| 157 | + # Tag(rel_fname, fname, line, name, kind) |
| 158 | + tags_summary.append(f"{tag_obj.kind}: {tag_obj.name} (L{tag_obj.line + 1})") |
| 159 | + |
| 160 | + display_index[rel_fpath] = { |
| 161 | + "abs_fpath": abs_fpath, |
| 162 | + "tags_summary": sorted(list(set(tags_summary))), # Unique, sorted |
| 163 | + "error": error_msg |
| 164 | + } |
| 165 | + |
| 166 | + self.last_index_time = current_scan_time |
| 167 | + self.io.tool_output( |
| 168 | + f"File index refreshed. {updated_files} files updated/added. Total {len(display_index)} files.") |
| 169 | + return display_index |
| 170 | + |
| 171 | + # --- Goal 2: Generating Context in Aider's Way --- |
| 172 | + def get_aider_context(self, chat_files, other_files, mentioned_fnames=None, mentioned_idents=None): |
| 173 | + """ |
| 174 | + Generates a context string using Aider's RepoMap logic. |
| 175 | +
|
| 176 | + Args: |
| 177 | + chat_files (list[str]): List of absolute file paths currently in "chat" or focus. |
| 178 | + other_files (list[str]): List of other absolute file paths in the repo to consider. |
| 179 | + mentioned_fnames (set[str], optional): Set of relative filenames explicitly mentioned. |
| 180 | + mentioned_idents (set[str], optional): Set of identifiers explicitly mentioned. |
| 181 | +
|
| 182 | + Returns: |
| 183 | + str: The context string generated by Aider's RepoMap, or None. |
| 184 | + """ |
| 185 | + if self.verbose: |
| 186 | + self.io.tool_output(f"Generating Aider-style context for {len(chat_files)} chat files" |
| 187 | + f" and {len(other_files)} other files.") |
| 188 | + |
| 189 | + # Aider's RepoMap methods generally expect absolute paths for chat_fnames and other_fnames |
| 190 | + # and it handles the rel_path conversion internally. |
| 191 | + |
| 192 | + # Ensure RepoMap's internal cache is primed or updated if necessary. |
| 193 | + # RepoMap.get_ranked_tags_map handles its own caching and refreshing logic |
| 194 | + # based on its `refresh` setting. We might need to call `get_tags` for all files |
| 195 | + # beforehand if RepoMap relies on that being up-to-date from an external call, |
| 196 | + # but typically its `get_ranked_tags` will call `get_tags` as needed. |
| 197 | + # For safety, let's ensure tags are reasonably fresh for `other_files` if not done by get_file_list_for_display |
| 198 | + # (This is a bit redundant if get_file_list_for_display was just called, but good for standalone use) |
| 199 | + # for f_path in chat_files + other_files: |
| 200 | + # rel_f_path = self.aider_repo_map.get_rel_fname(f_path) |
| 201 | + # _ = self.aider_repo_map.get_tags(f_path, rel_f_path) # Primes cache |
| 202 | + |
| 203 | + return self.aider_repo_map.get_repo_map( |
| 204 | + chat_files=chat_files, |
| 205 | + other_files=other_files, |
| 206 | + mentioned_fnames=mentioned_fnames, |
| 207 | + mentioned_idents=mentioned_idents, |
| 208 | + # force_refresh=False # Control this based on TUI actions |
| 209 | + ) |
| 210 | + |
| 211 | + # --- Goal 3: Generating Context via Simple Concatenation --- |
| 212 | + def get_simple_concatenated_context(self, selected_abs_fpaths, include_headers=True, max_total_size_mb=None): |
| 213 | + """ |
| 214 | + Concatenates the full content of selected files with demarcations. |
| 215 | +
|
| 216 | + Args: |
| 217 | + selected_abs_fpaths (list[str]): List of absolute file paths to concatenate. |
| 218 | + include_headers (bool): Whether to include a header for each file. |
| 219 | + max_total_size_mb (float, optional): Maximum total size of concatenated output in MB. |
| 220 | +
|
| 221 | + Returns: |
| 222 | + str: The concatenated content. |
| 223 | + """ |
| 224 | + if self.verbose: |
| 225 | + self.io.tool_output(f"Generating simple concatenated context for {len(selected_abs_fpaths)} files.") |
| 226 | + |
| 227 | + output_parts = [] |
| 228 | + current_size_bytes = 0 |
| 229 | + limit_bytes = (max_total_size_mb * 1024 * 1024) if max_total_size_mb else float('inf') |
| 230 | + files_included_count = 0 |
| 231 | + |
| 232 | + for abs_fpath in selected_abs_fpaths: |
| 233 | + rel_fpath = self.aider_repo_map.get_rel_fname(abs_fpath) |
| 234 | + try: |
| 235 | + file_size = os.path.getsize(abs_fpath) |
| 236 | + if current_size_bytes + file_size > limit_bytes and max_total_size_mb is not None: |
| 237 | + self.io.tool_warning( |
| 238 | + f"Warning: Reached size limit of {max_total_size_mb}MB. Skipping remaining files.") |
| 239 | + break |
| 240 | + |
| 241 | + content = self.io.read_text(abs_fpath) |
| 242 | + if content is None: |
| 243 | + output_parts.append(f"--- ERROR READING FILE: {rel_fpath} ---\n[Content not available]\n\n") |
| 244 | + continue |
| 245 | + |
| 246 | + if include_headers: |
| 247 | + header = f"--- BEGIN FILE: {rel_fpath} ---\n" |
| 248 | + # Optionally, add some basic info from our index |
| 249 | + if rel_fpath in self.file_index and self.file_index[rel_fpath].get("tags"): |
| 250 | + defs = [ |
| 251 | + tag.name |
| 252 | + for tag in self.file_index[rel_fpath]["tags"] |
| 253 | + if tag.kind == "def" |
| 254 | + ] |
| 255 | + if defs: |
| 256 | + header += f"Definitions: {', '.join(defs[:5])}{'...' if len(defs) > 5 else ''}\n" |
| 257 | + header += "---\n" # Simple separator |
| 258 | + output_parts.append(header) |
| 259 | + |
| 260 | + output_parts.append(content) |
| 261 | + # Ensure a newline after content if it doesn't have one, before the end marker |
| 262 | + if not content.endswith('\n'): |
| 263 | + output_parts.append("\n") |
| 264 | + |
| 265 | + if include_headers: |
| 266 | + output_parts.append(f"--- END FILE: {rel_fpath} ---\n\n") |
| 267 | + else: |
| 268 | + output_parts.append("\n\n") # Just add some separation |
| 269 | + |
| 270 | + current_size_bytes += len(content.encode('utf-8')) # More accurate size based on content read |
| 271 | + files_included_count += 1 |
| 272 | + |
| 273 | + except FileNotFoundError: |
| 274 | + output_parts.append(f"--- FILE NOT FOUND: {rel_fpath} ---\n\n") |
| 275 | + except Exception as e: |
| 276 | + output_parts.append(f"--- ERROR PROCESSING FILE {rel_fpath}: {e} ---\n\n") |
| 277 | + |
| 278 | + if self.verbose: |
| 279 | + self.io.tool_output( |
| 280 | + f"Concatenated {files_included_count} files. Total size: {current_size_bytes / (1024 * 1024):.2f} MB") |
| 281 | + return "".join(output_parts) |
| 282 | + |
| 283 | + # --- Utility related to Aider's RepoMap if needed for TUI --- |
| 284 | + def get_repo_root(self): |
| 285 | + return self.repo_root |
| 286 | + |
| 287 | + def get_aider_map_tokens(self): |
| 288 | + return self.aider_repo_map.max_map_tokens |
| 289 | + |
| 290 | + def set_aider_map_tokens(self, tokens): |
| 291 | + self.aider_repo_map.max_map_tokens = tokens |
| 292 | + # Aider's RepoMap also has map_mul_no_files which influences effective tokens |
| 293 | + # when no chat files are present. You might want to expose/control that too. |
| 294 | + # self.aider_repo_map.map_mul_no_files = new_val |
| 295 | + |
| 296 | + def clear_aider_map_cache(self): |
| 297 | + """ Clears the cache used by Aider's get_ranked_tags_map. """ |
| 298 | + self.aider_repo_map.map_cache = {} |
| 299 | + self.aider_repo_map.last_map = None |
| 300 | + self.io.tool_output("Aider RepoMap internal cache cleared.") |
| 301 | + |
| 302 | +# |
| 303 | +# End of code_mapper.py |
| 304 | +######################################################################################################################## |
0 commit comments