Add naming, style, and documentation analyzers
Some checks failed
CI / test (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-01-29 13:23:39 +00:00
parent 160df350d7
commit 8a302a291e

View File

@@ -0,0 +1,189 @@
"""Style analyzer for indentation, quotes, and formatting patterns."""
import re
from pathlib import Path
from typing import Any
class StyleAnalyzer:
"""Analyzes code style patterns."""
def __init__(self, project_path: Path):
self.project_path = project_path
def analyze(self) -> dict[str, Any]:
"""Analyze style patterns across project files."""
indentation = self._detect_indentation()
quote_style = self._detect_quote_style()
line_endings = self._detect_line_endings()
max_line_length = self._detect_line_length()
trailing_newline = self._detect_trailing_newline()
return {
"indentation": indentation,
"quote_style": quote_style,
"line_endings": line_endings,
"max_line_length": max_line_length,
"trailing_newline": trailing_newline,
}
def _detect_indentation(self) -> dict[str, Any]:
"""Detect indentation style and width."""
indentations: dict[int, int] = {}
code_files = self._get_code_files()
for file_path in code_files[:30]:
content = self._safe_read_file(file_path)
if content:
indent_width = self._analyze_indent_width(content)
if indent_width:
indentations[indent_width] = indentations.get(indent_width, 0) + 1
if not indentations:
return {"style": "unknown", "width": None}
dominant_width: int = max(indentations.keys(), key=lambda k: indentations[k])
style = "spaces" if dominant_width in [2, 4] else "tabs"
return {"style": style, "width": dominant_width}
def _analyze_indent_width(self, content: str) -> int | None:
"""Analyze the indentation width from content."""
lines = content.split("\n")
indent_counts: dict[int, int] = {}
for line in lines:
if not line.strip():
continue
leading_spaces = len(line) - len(line.lstrip())
leading_tabs = len(line) - len(line.lstrip("\t"))
if leading_spaces > 0 and leading_spaces % 2 == 0:
indent_counts[leading_spaces] = indent_counts.get(leading_spaces, 0) + 1
elif leading_tabs > 0:
return 1
if indent_counts:
return min(indent_counts.keys(), key=lambda k: indent_counts[k])
return None
def _detect_quote_style(self) -> dict[str, Any]:
"""Detect quote style (single vs double)."""
single_count = 0
double_count = 0
code_files = self._get_code_files()
for file_path in code_files[:30]:
content = self._safe_read_file(file_path)
if content:
content = self._remove_string_literals(content)
single_count += content.count("'") - content.count("\\'")
double_count += content.count('"') - content.count('\\"')
total = single_count + double_count
if total == 0:
return {"style": "unknown", "ratio": None}
single_ratio = single_count / total
if single_ratio > 0.6:
style = "single"
elif double_ratio := 1 - single_ratio > 0.6:
style = "double"
else:
style = "mixed"
return {"style": style, "single_ratio": round(single_ratio, 2)}
def _remove_string_literals(self, content: str) -> str:
"""Remove string literals from content to avoid false positives."""
pattern = r'(?:"(?:[^"\\]|\\.)*")|(?:\'(?:[^\'\\]|\\.)*\')'
return re.sub(pattern, '""', content)
def _detect_line_endings(self) -> dict[str, Any]:
"""Detect line ending style (LF vs CRLF)."""
crlf_count = 0
lf_count = 0
code_files = self._get_code_files()
for file_path in code_files[:20]:
content = self._safe_read_file(file_path)
if content:
crlf_count += content.count("\r\n")
lf_count += content.count("\n") - crlf_count
total = crlf_count + lf_count
if total == 0:
return {"style": "unknown"}
if crlf_count > lf_count:
return {"style": "CRLF", "ratio": round(crlf_count / total, 2)}
else:
return {"style": "LF", "ratio": round(lf_count / total, 2)}
def _detect_line_length(self) -> dict[str, Any]:
"""Detect preferred line length."""
lengths: dict[int, int] = {}
code_files = self._get_code_files()
for file_path in code_files[:20]:
content = self._safe_read_file(file_path)
if content:
for line in content.split("\n"):
line_len = len(line.rstrip())
if line_len > 0:
bucket = (line_len // 10) * 10
lengths[bucket] = lengths.get(bucket, 0) + 1
if not lengths:
return {"max": None, "preferred": None}
max_bucket: int = max(lengths.keys(), key=lambda k: lengths[k])
return {"max": max_bucket + 10, "preferred": max_bucket}
def _detect_trailing_newline(self) -> dict[str, Any]:
"""Detect if files typically have trailing newlines."""
with_newline = 0
without_newline = 0
code_files = self._get_code_files()
for file_path in code_files[:20]:
content = self._safe_read_file(file_path)
if content:
if content.endswith("\n"):
with_newline += 1
else:
without_newline += 1
total = with_newline + without_newline
if total == 0:
return {"has_trailing_newline": None}
return {
"has_trailing_newline": with_newline > without_newline,
"ratio": round(with_newline / total, 2),
}
def _get_code_files(self) -> list[Path]:
"""Get list of code files to analyze."""
extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php", ".c", ".cpp"]
files = []
try:
for ext in extensions:
files.extend(self.project_path.rglob(f"*{ext}"))
except PermissionError:
pass
return sorted(set(files))
def _safe_read_file(self, path: Path) -> str | None:
"""Safely read a file."""
try:
return path.read_text(encoding="utf-8")
except (IOError, UnicodeDecodeError):
return None