Skip to content

Commit f2815a0

Browse files
committed
code mapper from aider, foundations for it for later
1 parent ddb741e commit f2815a0

File tree

11 files changed

+1922
-2
lines changed

11 files changed

+1922
-2
lines changed

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ dependencies = [
4545

4646
# --- Optional Dependencies ---
4747
[project.optional-dependencies]
48+
coding_map = [
49+
"grep_ast",
50+
"pygments",
51+
"tqdm",
52+
53+
]
4854
chunker = [
4955
"langdetect",
5056
"nltk",

tldw_chatbook/Coding/code_mapper.py

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
# tldw_chatbook/Coding/code_mapper.py
2+
# Description: This module provides a context manager for handling code files in a TUI application.
3+
#
4+
# Imports
5+
import os
6+
import time
7+
from pathlib import Path
8+
from collections import defaultdict
9+
#
10+
# Third-Party Imports
11+
from aider.repo_map import RepoMap, find_src_files # Import necessary components
12+
#
13+
########################################################################################################################
14+
#
15+
# You might need to provide stubs or mock objects for RepoMap's dependencies
16+
# if you're not running the full Aider environment, e.g., for `io` and `main_model`.
17+
class SimpleIO:
18+
def tool_output(self, message):
19+
print(f"[INFO] {message}")
20+
21+
def tool_warning(self, message):
22+
print(f"[WARNING] {message}")
23+
24+
def tool_error(self, message):
25+
print(f"[ERROR] {message}")
26+
27+
def read_text(self, fpath):
28+
try:
29+
with open(fpath, "r", encoding="utf-8", errors="ignore") as f:
30+
return f.read()
31+
except Exception as e:
32+
self.tool_error(f"Error reading {fpath}: {e}")
33+
return None
34+
35+
36+
class MockModel:
37+
def token_count(self, text):
38+
# A simple approximation for token counting.
39+
# For more accuracy, integrate with a real tokenizer (e.g., tiktoken).
40+
return len(text.split())
41+
42+
43+
class CodeContextManager:
44+
def __init__(self, repo_root, aider_map_tokens=1024, verbose=False):
45+
self.repo_root = os.path.abspath(repo_root)
46+
self.verbose = verbose
47+
48+
# --- Initialize Aider's RepoMap ---
49+
# You'll need to provide implementations or stubs for `io` and `main_model`
50+
# if they are strictly required by the parts of RepoMap you use.
51+
self.io = SimpleIO() # Replace with your TUI's IO if it has one
52+
self.main_model = MockModel() # Replace with a proper model tokenizer
53+
54+
self.aider_repo_map = RepoMap(
55+
map_tokens=aider_map_tokens,
56+
root=self.repo_root,
57+
main_model=self.main_model,
58+
io=self.io,
59+
verbose=self.verbose,
60+
# You might want to configure other RepoMap params as needed
61+
)
62+
# To store data for "indexing for display"
63+
self.file_index = {} # {rel_fpath: {"abs_fpath": str, "tags": list[Tag], "mtime": float, "error": str}}
64+
self.last_index_time = 0
65+
66+
# --- Goal 1: Indexing for Display and Review ---
67+
def get_file_list_for_display(self, force_rescan=False):
68+
"""
69+
Scans the repository (or uses cached data) to get a list of all files
70+
and their top-level symbols/tags for display in a TUI.
71+
72+
Args:
73+
force_rescan (bool): If True, forces a re-scan of all files, ignoring mtime checks.
74+
75+
Returns:
76+
dict: {rel_fpath: {"abs_fpath": str, "tags_summary": list[str], "error": str or None}}
77+
tags_summary might be like ["class MyClass", "def my_func"]
78+
"""
79+
print("Building file index for display...")
80+
current_scan_time = time.time()
81+
updated_files = 0
82+
processed_files = 0
83+
84+
# Discover all potential source files in the repository
85+
# You might want to use a more sophisticated discovery mechanism
86+
# like git ls-files, or respect .gitignore. Aider's RepoMap
87+
# often gets file lists from git, so it might not have its own
88+
# comprehensive discover_files respecting .gitignore.
89+
# For now, let's use a simple walk.
90+
all_repo_files = []
91+
for root, _, files in os.walk(self.repo_root):
92+
if ".git" in root.split(os.sep): # Basic .git ignore
93+
continue
94+
for file in files:
95+
abs_fpath = os.path.join(root, file)
96+
# Filter out some common non-code files (can be improved)
97+
if not any(abs_fpath.endswith(ext) for ext in
98+
[".py", ".js", ".ts", ".java", ".c", ".cpp", ".h", ".hpp", ".rs", ".go", ".md"]):
99+
if self.aider_repo_map.get_rel_fname(abs_fpath).startswith('.'): # hidden files
100+
continue
101+
# Check if language can be determined by Aider, crude filter for now
102+
if not self.aider_repo_map.filename_to_lang(abs_fpath):
103+
continue
104+
105+
all_repo_files.append(abs_fpath)
106+
107+
display_index = {}
108+
109+
for abs_fpath in all_repo_files:
110+
processed_files += 1
111+
if processed_files % 100 == 0 and self.verbose:
112+
self.io.tool_output(f"Scanned {processed_files}/{len(all_repo_files)} files for index...")
113+
114+
rel_fpath = self.aider_repo_map.get_rel_fname(abs_fpath)
115+
try:
116+
current_mtime = os.path.getmtime(abs_fpath)
117+
except FileNotFoundError:
118+
if rel_fpath in self.file_index:
119+
del self.file_index[rel_fpath] # Remove if deleted
120+
continue
121+
122+
# Check cache
123+
if not force_rescan and rel_fpath in self.file_index and self.file_index[rel_fpath][
124+
"mtime"] == current_mtime:
125+
# Use cached tags if mtime hasn't changed
126+
tags = self.file_index[rel_fpath]["tags"]
127+
error_msg = self.file_index[rel_fpath]["error"]
128+
else:
129+
# Get fresh tags using Aider's method
130+
# get_tags returns a list of Tag namedtuples
131+
try:
132+
tags = list(self.aider_repo_map.get_tags(abs_fpath, rel_fpath))
133+
error_msg = None
134+
self.file_index[rel_fpath] = {
135+
"abs_fpath": abs_fpath,
136+
"tags": tags,
137+
"mtime": current_mtime,
138+
"error": None
139+
}
140+
updated_files += 1
141+
except Exception as e:
142+
tags = []
143+
error_msg = f"Error processing {rel_fpath}: {e}"
144+
self.file_index[rel_fpath] = {
145+
"abs_fpath": abs_fpath,
146+
"tags": [],
147+
"mtime": current_mtime,
148+
"error": str(e)
149+
}
150+
if self.verbose: self.io.tool_warning(error_msg)
151+
152+
# Prepare a summary for display (e.g., class and function definitions)
153+
tags_summary = []
154+
if tags:
155+
for tag_obj in tags:
156+
if tag_obj.kind == "def": # We are interested in definitions for tree view
157+
# Tag(rel_fname, fname, line, name, kind)
158+
tags_summary.append(f"{tag_obj.kind}: {tag_obj.name} (L{tag_obj.line + 1})")
159+
160+
display_index[rel_fpath] = {
161+
"abs_fpath": abs_fpath,
162+
"tags_summary": sorted(list(set(tags_summary))), # Unique, sorted
163+
"error": error_msg
164+
}
165+
166+
self.last_index_time = current_scan_time
167+
self.io.tool_output(
168+
f"File index refreshed. {updated_files} files updated/added. Total {len(display_index)} files.")
169+
return display_index
170+
171+
# --- Goal 2: Generating Context in Aider's Way ---
172+
def get_aider_context(self, chat_files, other_files, mentioned_fnames=None, mentioned_idents=None):
173+
"""
174+
Generates a context string using Aider's RepoMap logic.
175+
176+
Args:
177+
chat_files (list[str]): List of absolute file paths currently in "chat" or focus.
178+
other_files (list[str]): List of other absolute file paths in the repo to consider.
179+
mentioned_fnames (set[str], optional): Set of relative filenames explicitly mentioned.
180+
mentioned_idents (set[str], optional): Set of identifiers explicitly mentioned.
181+
182+
Returns:
183+
str: The context string generated by Aider's RepoMap, or None.
184+
"""
185+
if self.verbose:
186+
self.io.tool_output(f"Generating Aider-style context for {len(chat_files)} chat files"
187+
f" and {len(other_files)} other files.")
188+
189+
# Aider's RepoMap methods generally expect absolute paths for chat_fnames and other_fnames
190+
# and it handles the rel_path conversion internally.
191+
192+
# Ensure RepoMap's internal cache is primed or updated if necessary.
193+
# RepoMap.get_ranked_tags_map handles its own caching and refreshing logic
194+
# based on its `refresh` setting. We might need to call `get_tags` for all files
195+
# beforehand if RepoMap relies on that being up-to-date from an external call,
196+
# but typically its `get_ranked_tags` will call `get_tags` as needed.
197+
# For safety, let's ensure tags are reasonably fresh for `other_files` if not done by get_file_list_for_display
198+
# (This is a bit redundant if get_file_list_for_display was just called, but good for standalone use)
199+
# for f_path in chat_files + other_files:
200+
# rel_f_path = self.aider_repo_map.get_rel_fname(f_path)
201+
# _ = self.aider_repo_map.get_tags(f_path, rel_f_path) # Primes cache
202+
203+
return self.aider_repo_map.get_repo_map(
204+
chat_files=chat_files,
205+
other_files=other_files,
206+
mentioned_fnames=mentioned_fnames,
207+
mentioned_idents=mentioned_idents,
208+
# force_refresh=False # Control this based on TUI actions
209+
)
210+
211+
# --- Goal 3: Generating Context via Simple Concatenation ---
212+
def get_simple_concatenated_context(self, selected_abs_fpaths, include_headers=True, max_total_size_mb=None):
213+
"""
214+
Concatenates the full content of selected files with demarcations.
215+
216+
Args:
217+
selected_abs_fpaths (list[str]): List of absolute file paths to concatenate.
218+
include_headers (bool): Whether to include a header for each file.
219+
max_total_size_mb (float, optional): Maximum total size of concatenated output in MB.
220+
221+
Returns:
222+
str: The concatenated content.
223+
"""
224+
if self.verbose:
225+
self.io.tool_output(f"Generating simple concatenated context for {len(selected_abs_fpaths)} files.")
226+
227+
output_parts = []
228+
current_size_bytes = 0
229+
limit_bytes = (max_total_size_mb * 1024 * 1024) if max_total_size_mb else float('inf')
230+
files_included_count = 0
231+
232+
for abs_fpath in selected_abs_fpaths:
233+
rel_fpath = self.aider_repo_map.get_rel_fname(abs_fpath)
234+
try:
235+
file_size = os.path.getsize(abs_fpath)
236+
if current_size_bytes + file_size > limit_bytes and max_total_size_mb is not None:
237+
self.io.tool_warning(
238+
f"Warning: Reached size limit of {max_total_size_mb}MB. Skipping remaining files.")
239+
break
240+
241+
content = self.io.read_text(abs_fpath)
242+
if content is None:
243+
output_parts.append(f"--- ERROR READING FILE: {rel_fpath} ---\n[Content not available]\n\n")
244+
continue
245+
246+
if include_headers:
247+
header = f"--- BEGIN FILE: {rel_fpath} ---\n"
248+
# Optionally, add some basic info from our index
249+
if rel_fpath in self.file_index and self.file_index[rel_fpath].get("tags"):
250+
defs = [
251+
tag.name
252+
for tag in self.file_index[rel_fpath]["tags"]
253+
if tag.kind == "def"
254+
]
255+
if defs:
256+
header += f"Definitions: {', '.join(defs[:5])}{'...' if len(defs) > 5 else ''}\n"
257+
header += "---\n" # Simple separator
258+
output_parts.append(header)
259+
260+
output_parts.append(content)
261+
# Ensure a newline after content if it doesn't have one, before the end marker
262+
if not content.endswith('\n'):
263+
output_parts.append("\n")
264+
265+
if include_headers:
266+
output_parts.append(f"--- END FILE: {rel_fpath} ---\n\n")
267+
else:
268+
output_parts.append("\n\n") # Just add some separation
269+
270+
current_size_bytes += len(content.encode('utf-8')) # More accurate size based on content read
271+
files_included_count += 1
272+
273+
except FileNotFoundError:
274+
output_parts.append(f"--- FILE NOT FOUND: {rel_fpath} ---\n\n")
275+
except Exception as e:
276+
output_parts.append(f"--- ERROR PROCESSING FILE {rel_fpath}: {e} ---\n\n")
277+
278+
if self.verbose:
279+
self.io.tool_output(
280+
f"Concatenated {files_included_count} files. Total size: {current_size_bytes / (1024 * 1024):.2f} MB")
281+
return "".join(output_parts)
282+
283+
# --- Utility related to Aider's RepoMap if needed for TUI ---
284+
def get_repo_root(self):
285+
return self.repo_root
286+
287+
def get_aider_map_tokens(self):
288+
return self.aider_repo_map.max_map_tokens
289+
290+
def set_aider_map_tokens(self, tokens):
291+
self.aider_repo_map.max_map_tokens = tokens
292+
# Aider's RepoMap also has map_mul_no_files which influences effective tokens
293+
# when no chat files are present. You might want to expose/control that too.
294+
# self.aider_repo_map.map_mul_no_files = new_val
295+
296+
def clear_aider_map_cache(self):
297+
""" Clears the cache used by Aider's get_ranked_tags_map. """
298+
self.aider_repo_map.map_cache = {}
299+
self.aider_repo_map.last_map = None
300+
self.io.tool_output("Aider RepoMap internal cache cleared.")
301+
302+
#
303+
# End of code_mapper.py
304+
########################################################################################################################

tldw_chatbook/Constants.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,5 +1970,61 @@
19701970

19711971

19721972
#
1973+
# MLX-LM Server Arguments Help Text
1974+
MLX_LM_SERVER_ARGS_HELP_TEXT = """
1975+
[bold cyan]--- MLX-LM Server Arguments ---[/]
1976+
1977+
[bold]--model MODEL[/]
1978+
Path to the model directory or HuggingFace model ID
1979+
(e.g., [italic]--model mlx-community/Nous-Hermes-2-Mistral-7B-DPO-4bit-MLX[/])
1980+
1981+
[bold]--host HOST[/]
1982+
Host address to bind the server to (default: 127.0.0.1)
1983+
(e.g., [italic]--host 0.0.0.0[/])
1984+
1985+
[bold]--port PORT[/]
1986+
Port to run the server on (default: 8080)
1987+
(e.g., [italic]--port 8000[/])
1988+
1989+
[bold]--max-tokens N[/]
1990+
Maximum number of tokens to generate (default: 100)
1991+
(e.g., [italic]--max-tokens 512[/])
1992+
1993+
[bold]--temperature TEMP[/]
1994+
Sampling temperature (default: 0.8)
1995+
(e.g., [italic]--temperature 0.7[/])
1996+
1997+
[bold]--top-p P[/]
1998+
Top-p sampling (default: 0.9)
1999+
(e.g., [italic]--top-p 0.95[/])
2000+
2001+
[bold]--top-k K[/]
2002+
Top-k sampling (default: 40)
2003+
(e.g., [italic]--top-k 50[/])
2004+
2005+
[bold]--seed SEED[/]
2006+
Random seed for reproducibility (default: None)
2007+
(e.g., [italic]--seed 42[/])
2008+
2009+
[bold]--batch-size N[/]
2010+
Batch size for inference (default: 1)
2011+
(e.g., [italic]--batch-size 4[/])
2012+
2013+
[bold]--quantization {int8,int4,fp16,fp32}[/]
2014+
Quantization method to use (default: None)
2015+
(e.g., [italic]--quantization int4[/])
2016+
2017+
[bold]--device {cpu,gpu}[/]
2018+
Device to run inference on (default: auto-detect)
2019+
(e.g., [italic]--device gpu[/])
2020+
2021+
[bold]--trust-remote-code[/]
2022+
Trust remote code when loading models from HuggingFace
2023+
2024+
[bold]--revision REVISION[/]
2025+
Specific model revision to use from HuggingFace
2026+
(e.g., [italic]--revision main[/])
2027+
"""
2028+
19732029
# End of Constants.py
19742030
########################################################################################################################

0 commit comments

Comments
 (0)