Add project and language analyzers
This commit is contained in:
130
src/contextgen/analyzers/project_analyzer.py
Normal file
130
src/contextgen/analyzers/project_analyzer.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
"""Project analyzer for directory traversal and file discovery."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from contextgen.analyzers.base import BaseAnalyzer
|
||||||
|
|
||||||
|
|
||||||
|
class ProjectAnalyzer(BaseAnalyzer):
|
||||||
|
"""Analyzes project structure and discovers files."""
|
||||||
|
|
||||||
|
def __init__(self, project_path: Path, ignore_patterns: list[str] | None = None):
|
||||||
|
super().__init__(project_path)
|
||||||
|
self.ignore_patterns = ignore_patterns or [".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build"]
|
||||||
|
|
||||||
|
def analyze(self) -> dict[str, Any]:
|
||||||
|
"""Analyze project structure and return file tree and metadata."""
|
||||||
|
file_tree = self._build_file_tree(self.project_path)
|
||||||
|
key_files = self._find_key_files(self.project_path)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"file_tree": file_tree,
|
||||||
|
"key_files": key_files,
|
||||||
|
"total_files": self._count_files(self.project_path),
|
||||||
|
"total_dirs": self._count_dirs(self.project_path),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _build_file_tree(self, path: Path, prefix: str = "") -> dict[str, Any]:
|
||||||
|
"""Build a tree representation of the directory structure."""
|
||||||
|
result: dict[str, Any] = {"directories": {}, "files": []}
|
||||||
|
|
||||||
|
try:
|
||||||
|
entries = sorted(path.iterdir(), key=lambda x: (x.is_file(), x.name))
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
if self._should_ignore(entry):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if entry.is_dir():
|
||||||
|
dir_name = entry.name + "/"
|
||||||
|
result["directories"][dir_name] = self._build_file_tree(entry)
|
||||||
|
else:
|
||||||
|
result["files"].append(entry.name)
|
||||||
|
except PermissionError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _should_ignore(self, path: Path) -> bool:
|
||||||
|
"""Check if path should be ignored."""
|
||||||
|
name = path.name
|
||||||
|
for pattern in self.ignore_patterns:
|
||||||
|
if pattern.startswith("*"):
|
||||||
|
if name.endswith(pattern[1:]):
|
||||||
|
return True
|
||||||
|
elif name == pattern:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _find_key_files(self, path: Path) -> dict[str, str]:
|
||||||
|
"""Identify important configuration and entry point files."""
|
||||||
|
key_files: dict[str, str] = {}
|
||||||
|
|
||||||
|
config_patterns = {
|
||||||
|
"package.json": "npm/package.json",
|
||||||
|
"pyproject.toml": "Python project",
|
||||||
|
"requirements.txt": "Python dependencies",
|
||||||
|
"Cargo.toml": "Rust project",
|
||||||
|
"pom.xml": "Maven/Java project",
|
||||||
|
"build.gradle": "Gradle/Java project",
|
||||||
|
"go.mod": "Go module",
|
||||||
|
"Gemfile": "Ruby dependencies",
|
||||||
|
"composer.json": "PHP dependencies",
|
||||||
|
"setup.py": "Python setup",
|
||||||
|
"Makefile": "Build configuration",
|
||||||
|
"Dockerfile": "Docker configuration",
|
||||||
|
"docker-compose.yml": "Docker Compose",
|
||||||
|
".env.example": "Environment template",
|
||||||
|
"tsconfig.json": "TypeScript config",
|
||||||
|
"next.config.js": "Next.js config",
|
||||||
|
"vue.config.js": "Vue config",
|
||||||
|
}
|
||||||
|
|
||||||
|
for filename, description in config_patterns.items():
|
||||||
|
file_path = path / filename
|
||||||
|
if file_path.exists():
|
||||||
|
key_files[filename] = description
|
||||||
|
|
||||||
|
entry_point_patterns = {
|
||||||
|
"main.py": "Python entry point",
|
||||||
|
"app.py": "Python application",
|
||||||
|
"index.js": "JavaScript entry point",
|
||||||
|
"main.js": "JavaScript entry point",
|
||||||
|
"index.ts": "TypeScript entry point",
|
||||||
|
"main.go": "Go entry point",
|
||||||
|
"main.rs": "Rust entry point",
|
||||||
|
"index.html": "HTML entry point",
|
||||||
|
"src/App.tsx": "React entry point",
|
||||||
|
"src/main.tsx": "React entry point",
|
||||||
|
}
|
||||||
|
|
||||||
|
for filename, description in entry_point_patterns.items():
|
||||||
|
file_path = path / filename
|
||||||
|
if file_path.exists():
|
||||||
|
key_files[filename] = description
|
||||||
|
|
||||||
|
return key_files
|
||||||
|
|
||||||
|
def _count_files(self, path: Path) -> int:
|
||||||
|
"""Count total files in directory."""
|
||||||
|
count = 0
|
||||||
|
try:
|
||||||
|
for entry in path.rglob("*"):
|
||||||
|
if entry.is_file() and not self._should_ignore(entry):
|
||||||
|
count += 1
|
||||||
|
except PermissionError:
|
||||||
|
pass
|
||||||
|
return count
|
||||||
|
|
||||||
|
def _count_dirs(self, path: Path) -> int:
|
||||||
|
"""Count total directories in path."""
|
||||||
|
count = 0
|
||||||
|
try:
|
||||||
|
for entry in path.rglob("*"):
|
||||||
|
if entry.is_dir() and not self._should_ignore(entry):
|
||||||
|
count += 1
|
||||||
|
except PermissionError:
|
||||||
|
pass
|
||||||
|
return count
|
||||||
Reference in New Issue
Block a user