"""Style analyzer for indentation, quotes, and formatting patterns.""" import re from pathlib import Path from typing import Any class StyleAnalyzer: """Analyzes code style patterns.""" def __init__(self, project_path: Path): self.project_path = project_path def analyze(self) -> dict[str, Any]: """Analyze style patterns across project files.""" indentation = self._detect_indentation() quote_style = self._detect_quote_style() line_endings = self._detect_line_endings() max_line_length = self._detect_line_length() trailing_newline = self._detect_trailing_newline() return { "indentation": indentation, "quote_style": quote_style, "line_endings": line_endings, "max_line_length": max_line_length, "trailing_newline": trailing_newline, } def _detect_indentation(self) -> dict[str, Any]: """Detect indentation style and width.""" indentations: dict[int, int] = {} code_files = self._get_code_files() for file_path in code_files[:30]: content = self._safe_read_file(file_path) if content: indent_width = self._analyze_indent_width(content) if indent_width: indentations[indent_width] = indentations.get(indent_width, 0) + 1 if not indentations: return {"style": "unknown", "width": None} dominant_width: int = max(indentations.keys(), key=lambda k: indentations[k]) style = "spaces" if dominant_width in [2, 4] else "tabs" return {"style": style, "width": dominant_width} def _analyze_indent_width(self, content: str) -> int | None: """Analyze the indentation width from content.""" lines = content.split("\n") indent_counts: dict[int, int] = {} for line in lines: if not line.strip(): continue leading_spaces = len(line) - len(line.lstrip()) leading_tabs = len(line) - len(line.lstrip("\t")) if leading_spaces > 0 and leading_spaces % 2 == 0: indent_counts[leading_spaces] = indent_counts.get(leading_spaces, 0) + 1 elif leading_tabs > 0: return 1 if indent_counts: return min(indent_counts.keys(), key=lambda k: indent_counts[k]) return None def _detect_quote_style(self) -> dict[str, Any]: """Detect quote style (single vs double).""" single_count = 0 double_count = 0 code_files = self._get_code_files() for file_path in code_files[:30]: content = self._safe_read_file(file_path) if content: content = self._remove_string_literals(content) single_count += content.count("'") - content.count("\\'") double_count += content.count('"') - content.count('\\"') total = single_count + double_count if total == 0: return {"style": "unknown", "ratio": None} single_ratio = single_count / total if single_ratio > 0.6: style = "single" elif double_ratio := 1 - single_ratio > 0.6: style = "double" else: style = "mixed" return {"style": style, "single_ratio": round(single_ratio, 2)} def _remove_string_literals(self, content: str) -> str: """Remove string literals from content to avoid false positives.""" pattern = r'(?:"(?:[^"\\]|\\.)*")|(?:\'(?:[^\'\\]|\\.)*\')' return re.sub(pattern, '""', content) def _detect_line_endings(self) -> dict[str, Any]: """Detect line ending style (LF vs CRLF).""" crlf_count = 0 lf_count = 0 code_files = self._get_code_files() for file_path in code_files[:20]: content = self._safe_read_file(file_path) if content: crlf_count += content.count("\r\n") lf_count += content.count("\n") - crlf_count total = crlf_count + lf_count if total == 0: return {"style": "unknown"} if crlf_count > lf_count: return {"style": "CRLF", "ratio": round(crlf_count / total, 2)} else: return {"style": "LF", "ratio": round(lf_count / total, 2)} def _detect_line_length(self) -> dict[str, Any]: """Detect preferred line length.""" lengths: dict[int, int] = {} code_files = self._get_code_files() for file_path in code_files[:20]: content = self._safe_read_file(file_path) if content: for line in content.split("\n"): line_len = len(line.rstrip()) if line_len > 0: bucket = (line_len // 10) * 10 lengths[bucket] = lengths.get(bucket, 0) + 1 if not lengths: return {"max": None, "preferred": None} max_bucket: int = max(lengths.keys(), key=lambda k: lengths[k]) return {"max": max_bucket + 10, "preferred": max_bucket} def _detect_trailing_newline(self) -> dict[str, Any]: """Detect if files typically have trailing newlines.""" with_newline = 0 without_newline = 0 code_files = self._get_code_files() for file_path in code_files[:20]: content = self._safe_read_file(file_path) if content: if content.endswith("\n"): with_newline += 1 else: without_newline += 1 total = with_newline + without_newline if total == 0: return {"has_trailing_newline": None} return { "has_trailing_newline": with_newline > without_newline, "ratio": round(with_newline / total, 2), } def _get_code_files(self) -> list[Path]: """Get list of code files to analyze.""" extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php", ".c", ".cpp"] files = [] try: for ext in extensions: files.extend(self.project_path.rglob(f"*{ext}")) except PermissionError: pass return sorted(set(files)) def _safe_read_file(self, path: Path) -> str | None: """Safely read a file.""" try: return path.read_text(encoding="utf-8") except (IOError, UnicodeDecodeError): return None