"""Context building utilities for Local Code Assistant.""" import os from pathlib import Path from typing import Optional IGNORED_PATTERNS: set[str] = { "*.pyc", "__pycache__/*", ".git/*", "node_modules/*", "*.min.js", "*.min.css", "*.swp", "*.swo", ".DS_Store", "*.egg-info/*", ".tox/*", ".venv/*", "venv/*", ".env", "*.log", } def should_ignore(path: Path) -> bool: """Check if a path should be ignored. Args: path: Path to check. Returns: True if path should be ignored, False otherwise. """ str_path = str(path) str_path_lower = str_path.lower() for pattern in IGNORED_PATTERNS: if pattern.endswith("/*"): prefix = pattern[:-2] if prefix in str_path_lower: return True elif pattern.startswith("*"): ext = pattern[1:] if str_path_lower.endswith(ext): return True else: if pattern in str_path_lower: return True return False def _is_valid_file(file_path: Path, extensions: Optional[list[str]]) -> bool: """Check if file should be included. Args: file_path: Path to check. extensions: Optional list of allowed extensions. Returns: True if file should be included. """ ext = file_path.suffix.lower() if extensions is not None and ext not in extensions: return False if file_path.stat().st_size > 100000: return False return True def scan_directory( directory: Path, max_files: int = 10, max_chars: int = 50000, extensions: Optional[list[str]] = None, ) -> dict[str, str]: """Scan a directory for source files. Args: directory: Directory to scan. max_files: Maximum number of files to include. max_chars: Maximum total characters to include. extensions: Optional list of file extensions to include. Returns: Dictionary mapping file paths to content. """ files: dict[str, str] = {} total_chars = 0 if not directory.exists() or not directory.is_dir(): return files for root, dirs, filenames in os.walk(directory): root_path = Path(root) dirs[:] = [d for d in dirs if not should_ignore(root_path / d)] for filename in filenames: file_path = root_path / filename if should_ignore(file_path): continue if not _is_valid_file(file_path, extensions): continue try: content = file_path.read_text(encoding="utf-8", errors="ignore") if total_chars + len(content) > max_chars: continue files[str(file_path)] = content total_chars += len(content) if len(files) >= max_files: return files except Exception: continue return files def build_context_string(files: dict[str, str], max_files: int = 5) -> str: """Build a context string from files. Args: files: Dictionary mapping file paths to content. max_files: Maximum number of files to include. Returns: Context string for prompts. """ sorted_files = sorted(files.items(), key=lambda x: x[0]) context_parts = [] count = 0 for file_path, content in sorted_files: if count >= max_files: break context_parts.append(f"\n=== {file_path} ===\n{content}") count += 1 return "\n".join(context_parts) def get_project_info(directory: Path) -> dict[str, str]: """Get information about a project directory. Args: directory: Project directory path. Returns: Dictionary with project information. """ info: dict[str, str] = {} config_files = ["pyproject.toml", "package.json", "go.mod", "Cargo.toml", "Makefile"] for config_file in config_files: config_path = directory / config_file if config_path.exists(): try: if config_file == "pyproject.toml": info["build_system"] = "python" elif config_file == "package.json": info["build_system"] = "javascript" elif config_file == "go.mod": info["build_system"] = "go" elif config_file == "Cargo.toml": info["build_system"] = "rust" elif config_file == "Makefile": info["build_system"] = "make" break except Exception: continue return info