Add naming, style, and documentation analyzers
This commit is contained in:
206
src/contextgen/analyzers/naming_analyzer.py
Normal file
206
src/contextgen/analyzers/naming_analyzer.py
Normal file
@@ -0,0 +1,206 @@
|
||||
"""Naming convention analyzer using pattern detection."""
|
||||
|
||||
import re
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
class NamingConventionAnalyzer:
|
||||
"""Analyzes naming conventions in code files."""
|
||||
|
||||
CAMEL_CASE_PATTERN = re.compile(r"^[a-z]+[A-Z][a-zA-Z0-9]*$")
|
||||
PASCAL_CASE_PATTERN = re.compile(r"^[A-Z][a-zA-Z0-9]*$")
|
||||
SNAKE_CASE_PATTERN = re.compile(r"^[a-z][a-z0-9]*(_[a-z0-9]+)*$")
|
||||
KEBAB_CASE_PATTERN = re.compile(r"^[a-z][a-z0-9]*(-[a-z0-9]+)*$")
|
||||
|
||||
def __init__(self, project_path: Path):
|
||||
self.project_path = project_path
|
||||
|
||||
def analyze(self) -> dict[str, Any]:
|
||||
"""Analyze naming conventions across project files."""
|
||||
functions = self._extract_names("function")
|
||||
classes = self._extract_names("class")
|
||||
variables = self._extract_names("variable")
|
||||
constants = self._extract_names("constant")
|
||||
files = self._extract_file_names()
|
||||
|
||||
return {
|
||||
"functions": self._analyze_naming_patterns(functions, "function"),
|
||||
"classes": self._analyze_naming_patterns(classes, "class"),
|
||||
"variables": self._analyze_naming_patterns(variables, "variable"),
|
||||
"constants": self._analyze_naming_patterns(constants, "constant"),
|
||||
"files": self._analyze_file_naming(files),
|
||||
"dominant_style": self._determine_dominant_style(
|
||||
functions, classes, variables, constants
|
||||
),
|
||||
}
|
||||
|
||||
def _extract_names(self, name_type: str) -> list[str]:
|
||||
"""Extract names of a specific type from project files."""
|
||||
names: list[str] = []
|
||||
|
||||
code_files = self._get_code_files()
|
||||
|
||||
for file_path in code_files[:50]:
|
||||
content = self._safe_read_file(file_path)
|
||||
if content:
|
||||
extracted = self._extract_from_content(content, name_type)
|
||||
names.extend(extracted)
|
||||
|
||||
return names
|
||||
|
||||
def _extract_from_content(self, content: str, name_type: str) -> list[str]:
|
||||
"""Extract names from file content based on type."""
|
||||
names: list[str] = []
|
||||
|
||||
if name_type == "function":
|
||||
patterns = [
|
||||
r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)",
|
||||
r"function\s+([a-zA-Z_][a-zA-Z0-9_]*)",
|
||||
r"const\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=",
|
||||
r"let\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=",
|
||||
r"var\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=",
|
||||
]
|
||||
elif name_type == "class":
|
||||
patterns = [
|
||||
r"class\s+([A-Z][a-zA-Z0-9_]*)",
|
||||
r"struct\s+([A-Z][a-zA-Z0-9_]*)",
|
||||
r"type\s+([A-Z][a-zA-Z0-9_]*)",
|
||||
]
|
||||
elif name_type == "variable":
|
||||
patterns = [
|
||||
r"(?:const|let|var|val|varval)\s+([a-z_][a-zA-Z0-9_]*)\s*=",
|
||||
]
|
||||
elif name_type == "constant":
|
||||
patterns = [
|
||||
r"(?:const|VAL|static\s+const)\s+([A-Z][A-Z0-9_]*)",
|
||||
]
|
||||
else:
|
||||
patterns = []
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, content)
|
||||
names.extend(matches)
|
||||
|
||||
return names
|
||||
|
||||
def _extract_file_names(self) -> list[str]:
|
||||
"""Extract file names from project."""
|
||||
files = []
|
||||
try:
|
||||
for f in self.project_path.rglob("*"):
|
||||
if f.is_file() and not self._is_ignored(f):
|
||||
files.append(f.name)
|
||||
except PermissionError:
|
||||
pass
|
||||
return files
|
||||
|
||||
def _get_code_files(self) -> list[Path]:
|
||||
"""Get list of code files to analyze."""
|
||||
extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php"]
|
||||
files = []
|
||||
try:
|
||||
for ext in extensions:
|
||||
files.extend(self.project_path.rglob(f"*{ext}"))
|
||||
except PermissionError:
|
||||
pass
|
||||
return sorted(set(files))
|
||||
|
||||
def _is_ignored(self, path: Path) -> bool:
|
||||
"""Check if path should be ignored."""
|
||||
ignore_names = [".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build"]
|
||||
return any(part in path.parts for part in ignore_names)
|
||||
|
||||
def _safe_read_file(self, path: Path) -> str | None:
|
||||
"""Safely read a file."""
|
||||
try:
|
||||
return path.read_text(encoding="utf-8")
|
||||
except (IOError, UnicodeDecodeError):
|
||||
return None
|
||||
|
||||
def _analyze_naming_patterns(
|
||||
self, names: list[str], name_type: str
|
||||
) -> dict[str, Any]:
|
||||
"""Analyze naming patterns for a list of names."""
|
||||
if not names:
|
||||
return {"count": 0, "patterns": {}}
|
||||
|
||||
pattern_counts: dict[str, int] = {}
|
||||
|
||||
for name in names:
|
||||
pattern = self._classify_name(name)
|
||||
pattern_counts[pattern] = pattern_counts.get(pattern, 0) + 1
|
||||
|
||||
dominant = max(pattern_counts, key=pattern_counts.get)
|
||||
|
||||
return {
|
||||
"count": len(names),
|
||||
"patterns": pattern_counts,
|
||||
"dominant": dominant,
|
||||
"examples": names[:5],
|
||||
}
|
||||
|
||||
def _classify_name(self, name: str) -> str:
|
||||
"""Classify a name into a naming convention."""
|
||||
if self.CAMEL_CASE_PATTERN.match(name):
|
||||
return "camelCase"
|
||||
elif self.PASCAL_CASE_PATTERN.match(name):
|
||||
return "PascalCase"
|
||||
elif self.SNAKE_CASE_PATTERN.match(name):
|
||||
return "snake_case"
|
||||
elif self.KEBAB_CASE_PATTERN.match(name):
|
||||
return "kebab-case"
|
||||
elif name.isupper():
|
||||
return "UPPER_SNAKE_CASE"
|
||||
else:
|
||||
return "unknown"
|
||||
|
||||
def _analyze_file_naming(self, files: list[str]) -> dict[str, Any]:
|
||||
"""Analyze file naming conventions."""
|
||||
if not files:
|
||||
return {"count": 0, "pattern": "unknown"}
|
||||
|
||||
patterns: dict[str, int] = {}
|
||||
|
||||
for filename in files:
|
||||
if filename.replace(".", "_").replace("-", "_") == filename.lower():
|
||||
patterns["lowercase_with_underscores"] = patterns.get(
|
||||
"lowercase_with_underscores", 0
|
||||
) + 1
|
||||
elif filename.replace(".", "_") == filename:
|
||||
patterns["lowercase"] = patterns.get("lowercase", 0) + 1
|
||||
else:
|
||||
patterns["mixed"] = patterns.get("mixed", 0) + 1
|
||||
|
||||
dominant = max(patterns, key=patterns.get) if patterns else "unknown"
|
||||
|
||||
return {"count": len(files), "pattern": dominant, "examples": files[:5]}
|
||||
|
||||
def _determine_dominant_style(
|
||||
self,
|
||||
functions: list[str],
|
||||
classes: list[str],
|
||||
variables: list[str],
|
||||
constants: list[str],
|
||||
) -> str:
|
||||
"""Determine the dominant naming style for the project."""
|
||||
style_scores: dict[str, int] = {}
|
||||
|
||||
all_names = {
|
||||
"function": functions,
|
||||
"class": classes,
|
||||
"variable": variables,
|
||||
"constant": constants,
|
||||
}
|
||||
|
||||
for name_type, names in all_names.items():
|
||||
for name in names:
|
||||
pattern = self._classify_name(name)
|
||||
style_scores[pattern] = style_scores.get(pattern, 0) + 1
|
||||
|
||||
if not style_scores:
|
||||
return "unknown"
|
||||
|
||||
dominant = max(style_scores, key=style_scores.get)
|
||||
return dominant
|
||||
Reference in New Issue
Block a user