From 022ccd5a5b1900398063b650c50bd9e8d6b4b06d Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Thu, 29 Jan 2026 13:21:18 +0000 Subject: [PATCH] Add project and language analyzers --- src/contextgen/analyzers/project_analyzer.py | 130 +++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 src/contextgen/analyzers/project_analyzer.py diff --git a/src/contextgen/analyzers/project_analyzer.py b/src/contextgen/analyzers/project_analyzer.py new file mode 100644 index 0000000..53f29af --- /dev/null +++ b/src/contextgen/analyzers/project_analyzer.py @@ -0,0 +1,130 @@ +"""Project analyzer for directory traversal and file discovery.""" + +import os +from pathlib import Path +from typing import Any + +from contextgen.analyzers.base import BaseAnalyzer + + +class ProjectAnalyzer(BaseAnalyzer): + """Analyzes project structure and discovers files.""" + + def __init__(self, project_path: Path, ignore_patterns: list[str] | None = None): + super().__init__(project_path) + self.ignore_patterns = ignore_patterns or [".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build"] + + def analyze(self) -> dict[str, Any]: + """Analyze project structure and return file tree and metadata.""" + file_tree = self._build_file_tree(self.project_path) + key_files = self._find_key_files(self.project_path) + + return { + "file_tree": file_tree, + "key_files": key_files, + "total_files": self._count_files(self.project_path), + "total_dirs": self._count_dirs(self.project_path), + } + + def _build_file_tree(self, path: Path, prefix: str = "") -> dict[str, Any]: + """Build a tree representation of the directory structure.""" + result: dict[str, Any] = {"directories": {}, "files": []} + + try: + entries = sorted(path.iterdir(), key=lambda x: (x.is_file(), x.name)) + + for entry in entries: + if self._should_ignore(entry): + continue + + if entry.is_dir(): + dir_name = entry.name + "/" + result["directories"][dir_name] = self._build_file_tree(entry) + else: + result["files"].append(entry.name) + except PermissionError: + pass + + return result + + def _should_ignore(self, path: Path) -> bool: + """Check if path should be ignored.""" + name = path.name + for pattern in self.ignore_patterns: + if pattern.startswith("*"): + if name.endswith(pattern[1:]): + return True + elif name == pattern: + return True + return False + + def _find_key_files(self, path: Path) -> dict[str, str]: + """Identify important configuration and entry point files.""" + key_files: dict[str, str] = {} + + config_patterns = { + "package.json": "npm/package.json", + "pyproject.toml": "Python project", + "requirements.txt": "Python dependencies", + "Cargo.toml": "Rust project", + "pom.xml": "Maven/Java project", + "build.gradle": "Gradle/Java project", + "go.mod": "Go module", + "Gemfile": "Ruby dependencies", + "composer.json": "PHP dependencies", + "setup.py": "Python setup", + "Makefile": "Build configuration", + "Dockerfile": "Docker configuration", + "docker-compose.yml": "Docker Compose", + ".env.example": "Environment template", + "tsconfig.json": "TypeScript config", + "next.config.js": "Next.js config", + "vue.config.js": "Vue config", + } + + for filename, description in config_patterns.items(): + file_path = path / filename + if file_path.exists(): + key_files[filename] = description + + entry_point_patterns = { + "main.py": "Python entry point", + "app.py": "Python application", + "index.js": "JavaScript entry point", + "main.js": "JavaScript entry point", + "index.ts": "TypeScript entry point", + "main.go": "Go entry point", + "main.rs": "Rust entry point", + "index.html": "HTML entry point", + "src/App.tsx": "React entry point", + "src/main.tsx": "React entry point", + } + + for filename, description in entry_point_patterns.items(): + file_path = path / filename + if file_path.exists(): + key_files[filename] = description + + return key_files + + def _count_files(self, path: Path) -> int: + """Count total files in directory.""" + count = 0 + try: + for entry in path.rglob("*"): + if entry.is_file() and not self._should_ignore(entry): + count += 1 + except PermissionError: + pass + return count + + def _count_dirs(self, path: Path) -> int: + """Count total directories in path.""" + count = 0 + try: + for entry in path.rglob("*"): + if entry.is_dir() and not self._should_ignore(entry): + count += 1 + except PermissionError: + pass + return count