Add naming, style, and documentation analyzers
Some checks failed
CI / test (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-01-29 13:23:39 +00:00
parent 135be8a628
commit 160df350d7

View File

@@ -0,0 +1,206 @@
"""Naming convention analyzer using pattern detection."""
import re
from collections import Counter
from pathlib import Path
from typing import Any
class NamingConventionAnalyzer:
"""Analyzes naming conventions in code files."""
CAMEL_CASE_PATTERN = re.compile(r"^[a-z]+[A-Z][a-zA-Z0-9]*$")
PASCAL_CASE_PATTERN = re.compile(r"^[A-Z][a-zA-Z0-9]*$")
SNAKE_CASE_PATTERN = re.compile(r"^[a-z][a-z0-9]*(_[a-z0-9]+)*$")
KEBAB_CASE_PATTERN = re.compile(r"^[a-z][a-z0-9]*(-[a-z0-9]+)*$")
def __init__(self, project_path: Path):
self.project_path = project_path
def analyze(self) -> dict[str, Any]:
"""Analyze naming conventions across project files."""
functions = self._extract_names("function")
classes = self._extract_names("class")
variables = self._extract_names("variable")
constants = self._extract_names("constant")
files = self._extract_file_names()
return {
"functions": self._analyze_naming_patterns(functions, "function"),
"classes": self._analyze_naming_patterns(classes, "class"),
"variables": self._analyze_naming_patterns(variables, "variable"),
"constants": self._analyze_naming_patterns(constants, "constant"),
"files": self._analyze_file_naming(files),
"dominant_style": self._determine_dominant_style(
functions, classes, variables, constants
),
}
def _extract_names(self, name_type: str) -> list[str]:
"""Extract names of a specific type from project files."""
names: list[str] = []
code_files = self._get_code_files()
for file_path in code_files[:50]:
content = self._safe_read_file(file_path)
if content:
extracted = self._extract_from_content(content, name_type)
names.extend(extracted)
return names
def _extract_from_content(self, content: str, name_type: str) -> list[str]:
"""Extract names from file content based on type."""
names: list[str] = []
if name_type == "function":
patterns = [
r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)",
r"function\s+([a-zA-Z_][a-zA-Z0-9_]*)",
r"const\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=",
r"let\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=",
r"var\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=",
]
elif name_type == "class":
patterns = [
r"class\s+([A-Z][a-zA-Z0-9_]*)",
r"struct\s+([A-Z][a-zA-Z0-9_]*)",
r"type\s+([A-Z][a-zA-Z0-9_]*)",
]
elif name_type == "variable":
patterns = [
r"(?:const|let|var|val|varval)\s+([a-z_][a-zA-Z0-9_]*)\s*=",
]
elif name_type == "constant":
patterns = [
r"(?:const|VAL|static\s+const)\s+([A-Z][A-Z0-9_]*)",
]
else:
patterns = []
for pattern in patterns:
matches = re.findall(pattern, content)
names.extend(matches)
return names
def _extract_file_names(self) -> list[str]:
"""Extract file names from project."""
files = []
try:
for f in self.project_path.rglob("*"):
if f.is_file() and not self._is_ignored(f):
files.append(f.name)
except PermissionError:
pass
return files
def _get_code_files(self) -> list[Path]:
"""Get list of code files to analyze."""
extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php"]
files = []
try:
for ext in extensions:
files.extend(self.project_path.rglob(f"*{ext}"))
except PermissionError:
pass
return sorted(set(files))
def _is_ignored(self, path: Path) -> bool:
"""Check if path should be ignored."""
ignore_names = [".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build"]
return any(part in path.parts for part in ignore_names)
def _safe_read_file(self, path: Path) -> str | None:
"""Safely read a file."""
try:
return path.read_text(encoding="utf-8")
except (IOError, UnicodeDecodeError):
return None
def _analyze_naming_patterns(
self, names: list[str], name_type: str
) -> dict[str, Any]:
"""Analyze naming patterns for a list of names."""
if not names:
return {"count": 0, "patterns": {}}
pattern_counts: dict[str, int] = {}
for name in names:
pattern = self._classify_name(name)
pattern_counts[pattern] = pattern_counts.get(pattern, 0) + 1
dominant = max(pattern_counts, key=pattern_counts.get)
return {
"count": len(names),
"patterns": pattern_counts,
"dominant": dominant,
"examples": names[:5],
}
def _classify_name(self, name: str) -> str:
"""Classify a name into a naming convention."""
if self.CAMEL_CASE_PATTERN.match(name):
return "camelCase"
elif self.PASCAL_CASE_PATTERN.match(name):
return "PascalCase"
elif self.SNAKE_CASE_PATTERN.match(name):
return "snake_case"
elif self.KEBAB_CASE_PATTERN.match(name):
return "kebab-case"
elif name.isupper():
return "UPPER_SNAKE_CASE"
else:
return "unknown"
def _analyze_file_naming(self, files: list[str]) -> dict[str, Any]:
"""Analyze file naming conventions."""
if not files:
return {"count": 0, "pattern": "unknown"}
patterns: dict[str, int] = {}
for filename in files:
if filename.replace(".", "_").replace("-", "_") == filename.lower():
patterns["lowercase_with_underscores"] = patterns.get(
"lowercase_with_underscores", 0
) + 1
elif filename.replace(".", "_") == filename:
patterns["lowercase"] = patterns.get("lowercase", 0) + 1
else:
patterns["mixed"] = patterns.get("mixed", 0) + 1
dominant = max(patterns, key=patterns.get) if patterns else "unknown"
return {"count": len(files), "pattern": dominant, "examples": files[:5]}
def _determine_dominant_style(
self,
functions: list[str],
classes: list[str],
variables: list[str],
constants: list[str],
) -> str:
"""Determine the dominant naming style for the project."""
style_scores: dict[str, int] = {}
all_names = {
"function": functions,
"class": classes,
"variable": variables,
"constant": constants,
}
for name_type, names in all_names.items():
for name in names:
pattern = self._classify_name(name)
style_scores[pattern] = style_scores.get(pattern, 0) + 1
if not style_scores:
return "unknown"
dominant = max(style_scores, key=style_scores.get)
return dominant