Add project and language analyzers
This commit is contained in:
130
src/contextgen/analyzers/project_analyzer.py
Normal file
130
src/contextgen/analyzers/project_analyzer.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""Project analyzer for directory traversal and file discovery."""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from contextgen.analyzers.base import BaseAnalyzer
|
||||
|
||||
|
||||
class ProjectAnalyzer(BaseAnalyzer):
|
||||
"""Analyzes project structure and discovers files."""
|
||||
|
||||
def __init__(self, project_path: Path, ignore_patterns: list[str] | None = None):
|
||||
super().__init__(project_path)
|
||||
self.ignore_patterns = ignore_patterns or [".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build"]
|
||||
|
||||
def analyze(self) -> dict[str, Any]:
|
||||
"""Analyze project structure and return file tree and metadata."""
|
||||
file_tree = self._build_file_tree(self.project_path)
|
||||
key_files = self._find_key_files(self.project_path)
|
||||
|
||||
return {
|
||||
"file_tree": file_tree,
|
||||
"key_files": key_files,
|
||||
"total_files": self._count_files(self.project_path),
|
||||
"total_dirs": self._count_dirs(self.project_path),
|
||||
}
|
||||
|
||||
def _build_file_tree(self, path: Path, prefix: str = "") -> dict[str, Any]:
|
||||
"""Build a tree representation of the directory structure."""
|
||||
result: dict[str, Any] = {"directories": {}, "files": []}
|
||||
|
||||
try:
|
||||
entries = sorted(path.iterdir(), key=lambda x: (x.is_file(), x.name))
|
||||
|
||||
for entry in entries:
|
||||
if self._should_ignore(entry):
|
||||
continue
|
||||
|
||||
if entry.is_dir():
|
||||
dir_name = entry.name + "/"
|
||||
result["directories"][dir_name] = self._build_file_tree(entry)
|
||||
else:
|
||||
result["files"].append(entry.name)
|
||||
except PermissionError:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
def _should_ignore(self, path: Path) -> bool:
|
||||
"""Check if path should be ignored."""
|
||||
name = path.name
|
||||
for pattern in self.ignore_patterns:
|
||||
if pattern.startswith("*"):
|
||||
if name.endswith(pattern[1:]):
|
||||
return True
|
||||
elif name == pattern:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _find_key_files(self, path: Path) -> dict[str, str]:
|
||||
"""Identify important configuration and entry point files."""
|
||||
key_files: dict[str, str] = {}
|
||||
|
||||
config_patterns = {
|
||||
"package.json": "npm/package.json",
|
||||
"pyproject.toml": "Python project",
|
||||
"requirements.txt": "Python dependencies",
|
||||
"Cargo.toml": "Rust project",
|
||||
"pom.xml": "Maven/Java project",
|
||||
"build.gradle": "Gradle/Java project",
|
||||
"go.mod": "Go module",
|
||||
"Gemfile": "Ruby dependencies",
|
||||
"composer.json": "PHP dependencies",
|
||||
"setup.py": "Python setup",
|
||||
"Makefile": "Build configuration",
|
||||
"Dockerfile": "Docker configuration",
|
||||
"docker-compose.yml": "Docker Compose",
|
||||
".env.example": "Environment template",
|
||||
"tsconfig.json": "TypeScript config",
|
||||
"next.config.js": "Next.js config",
|
||||
"vue.config.js": "Vue config",
|
||||
}
|
||||
|
||||
for filename, description in config_patterns.items():
|
||||
file_path = path / filename
|
||||
if file_path.exists():
|
||||
key_files[filename] = description
|
||||
|
||||
entry_point_patterns = {
|
||||
"main.py": "Python entry point",
|
||||
"app.py": "Python application",
|
||||
"index.js": "JavaScript entry point",
|
||||
"main.js": "JavaScript entry point",
|
||||
"index.ts": "TypeScript entry point",
|
||||
"main.go": "Go entry point",
|
||||
"main.rs": "Rust entry point",
|
||||
"index.html": "HTML entry point",
|
||||
"src/App.tsx": "React entry point",
|
||||
"src/main.tsx": "React entry point",
|
||||
}
|
||||
|
||||
for filename, description in entry_point_patterns.items():
|
||||
file_path = path / filename
|
||||
if file_path.exists():
|
||||
key_files[filename] = description
|
||||
|
||||
return key_files
|
||||
|
||||
def _count_files(self, path: Path) -> int:
|
||||
"""Count total files in directory."""
|
||||
count = 0
|
||||
try:
|
||||
for entry in path.rglob("*"):
|
||||
if entry.is_file() and not self._should_ignore(entry):
|
||||
count += 1
|
||||
except PermissionError:
|
||||
pass
|
||||
return count
|
||||
|
||||
def _count_dirs(self, path: Path) -> int:
|
||||
"""Count total directories in path."""
|
||||
count = 0
|
||||
try:
|
||||
for entry in path.rglob("*"):
|
||||
if entry.is_dir() and not self._should_ignore(entry):
|
||||
count += 1
|
||||
except PermissionError:
|
||||
pass
|
||||
return count
|
||||
Reference in New Issue
Block a user