diff --git a/src/contextgen/analyzers/naming_analyzer.py b/src/contextgen/analyzers/naming_analyzer.py new file mode 100644 index 0000000..0bcfc2c --- /dev/null +++ b/src/contextgen/analyzers/naming_analyzer.py @@ -0,0 +1,206 @@ +"""Naming convention analyzer using pattern detection.""" + +import re +from collections import Counter +from pathlib import Path +from typing import Any + + +class NamingConventionAnalyzer: + """Analyzes naming conventions in code files.""" + + CAMEL_CASE_PATTERN = re.compile(r"^[a-z]+[A-Z][a-zA-Z0-9]*$") + PASCAL_CASE_PATTERN = re.compile(r"^[A-Z][a-zA-Z0-9]*$") + SNAKE_CASE_PATTERN = re.compile(r"^[a-z][a-z0-9]*(_[a-z0-9]+)*$") + KEBAB_CASE_PATTERN = re.compile(r"^[a-z][a-z0-9]*(-[a-z0-9]+)*$") + + def __init__(self, project_path: Path): + self.project_path = project_path + + def analyze(self) -> dict[str, Any]: + """Analyze naming conventions across project files.""" + functions = self._extract_names("function") + classes = self._extract_names("class") + variables = self._extract_names("variable") + constants = self._extract_names("constant") + files = self._extract_file_names() + + return { + "functions": self._analyze_naming_patterns(functions, "function"), + "classes": self._analyze_naming_patterns(classes, "class"), + "variables": self._analyze_naming_patterns(variables, "variable"), + "constants": self._analyze_naming_patterns(constants, "constant"), + "files": self._analyze_file_naming(files), + "dominant_style": self._determine_dominant_style( + functions, classes, variables, constants + ), + } + + def _extract_names(self, name_type: str) -> list[str]: + """Extract names of a specific type from project files.""" + names: list[str] = [] + + code_files = self._get_code_files() + + for file_path in code_files[:50]: + content = self._safe_read_file(file_path) + if content: + extracted = self._extract_from_content(content, name_type) + names.extend(extracted) + + return names + + def _extract_from_content(self, content: str, name_type: str) -> list[str]: + """Extract names from file content based on type.""" + names: list[str] = [] + + if name_type == "function": + patterns = [ + r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)", + r"function\s+([a-zA-Z_][a-zA-Z0-9_]*)", + r"const\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=", + r"let\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=", + r"var\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=", + ] + elif name_type == "class": + patterns = [ + r"class\s+([A-Z][a-zA-Z0-9_]*)", + r"struct\s+([A-Z][a-zA-Z0-9_]*)", + r"type\s+([A-Z][a-zA-Z0-9_]*)", + ] + elif name_type == "variable": + patterns = [ + r"(?:const|let|var|val|varval)\s+([a-z_][a-zA-Z0-9_]*)\s*=", + ] + elif name_type == "constant": + patterns = [ + r"(?:const|VAL|static\s+const)\s+([A-Z][A-Z0-9_]*)", + ] + else: + patterns = [] + + for pattern in patterns: + matches = re.findall(pattern, content) + names.extend(matches) + + return names + + def _extract_file_names(self) -> list[str]: + """Extract file names from project.""" + files = [] + try: + for f in self.project_path.rglob("*"): + if f.is_file() and not self._is_ignored(f): + files.append(f.name) + except PermissionError: + pass + return files + + def _get_code_files(self) -> list[Path]: + """Get list of code files to analyze.""" + extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php"] + files = [] + try: + for ext in extensions: + files.extend(self.project_path.rglob(f"*{ext}")) + except PermissionError: + pass + return sorted(set(files)) + + def _is_ignored(self, path: Path) -> bool: + """Check if path should be ignored.""" + ignore_names = [".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build"] + return any(part in path.parts for part in ignore_names) + + def _safe_read_file(self, path: Path) -> str | None: + """Safely read a file.""" + try: + return path.read_text(encoding="utf-8") + except (IOError, UnicodeDecodeError): + return None + + def _analyze_naming_patterns( + self, names: list[str], name_type: str + ) -> dict[str, Any]: + """Analyze naming patterns for a list of names.""" + if not names: + return {"count": 0, "patterns": {}} + + pattern_counts: dict[str, int] = {} + + for name in names: + pattern = self._classify_name(name) + pattern_counts[pattern] = pattern_counts.get(pattern, 0) + 1 + + dominant = max(pattern_counts, key=pattern_counts.get) + + return { + "count": len(names), + "patterns": pattern_counts, + "dominant": dominant, + "examples": names[:5], + } + + def _classify_name(self, name: str) -> str: + """Classify a name into a naming convention.""" + if self.CAMEL_CASE_PATTERN.match(name): + return "camelCase" + elif self.PASCAL_CASE_PATTERN.match(name): + return "PascalCase" + elif self.SNAKE_CASE_PATTERN.match(name): + return "snake_case" + elif self.KEBAB_CASE_PATTERN.match(name): + return "kebab-case" + elif name.isupper(): + return "UPPER_SNAKE_CASE" + else: + return "unknown" + + def _analyze_file_naming(self, files: list[str]) -> dict[str, Any]: + """Analyze file naming conventions.""" + if not files: + return {"count": 0, "pattern": "unknown"} + + patterns: dict[str, int] = {} + + for filename in files: + if filename.replace(".", "_").replace("-", "_") == filename.lower(): + patterns["lowercase_with_underscores"] = patterns.get( + "lowercase_with_underscores", 0 + ) + 1 + elif filename.replace(".", "_") == filename: + patterns["lowercase"] = patterns.get("lowercase", 0) + 1 + else: + patterns["mixed"] = patterns.get("mixed", 0) + 1 + + dominant = max(patterns, key=patterns.get) if patterns else "unknown" + + return {"count": len(files), "pattern": dominant, "examples": files[:5]} + + def _determine_dominant_style( + self, + functions: list[str], + classes: list[str], + variables: list[str], + constants: list[str], + ) -> str: + """Determine the dominant naming style for the project.""" + style_scores: dict[str, int] = {} + + all_names = { + "function": functions, + "class": classes, + "variable": variables, + "constant": constants, + } + + for name_type, names in all_names.items(): + for name in names: + pattern = self._classify_name(name) + style_scores[pattern] = style_scores.get(pattern, 0) + 1 + + if not style_scores: + return "unknown" + + dominant = max(style_scores, key=style_scores.get) + return dominant