Add naming, style, and documentation analyzers

2026-01-29 13:23:39 +00:00
parent 160df350d7
commit 8a302a291e
1 changed files with 189 additions and 0 deletions
--- a/src/contextgen/analyzers/style_analyzer.py
+++ b/src/contextgen/analyzers/style_analyzer.py
@@ -0,0 +1,189 @@
 """Style analyzer for indentation, quotes, and formatting patterns."""
 import re
 from pathlib import Path
 from typing import Any
 class StyleAnalyzer:
    """Analyzes code style patterns."""
    def __init__(self, project_path: Path):
        self.project_path = project_path
    def analyze(self) -> dict[str, Any]:
        """Analyze style patterns across project files."""
        indentation = self._detect_indentation()
        quote_style = self._detect_quote_style()
        line_endings = self._detect_line_endings()
        max_line_length = self._detect_line_length()
        trailing_newline = self._detect_trailing_newline()
        return {
            "indentation": indentation,
            "quote_style": quote_style,
            "line_endings": line_endings,
            "max_line_length": max_line_length,
            "trailing_newline": trailing_newline,
        }
    def _detect_indentation(self) -> dict[str, Any]:
        """Detect indentation style and width."""
        indentations: dict[int, int] = {}
        code_files = self._get_code_files()
        for file_path in code_files[:30]:
            content = self._safe_read_file(file_path)
            if content:
                indent_width = self._analyze_indent_width(content)
                if indent_width:
                    indentations[indent_width] = indentations.get(indent_width, 0) + 1
        if not indentations:
            return {"style": "unknown", "width": None}
        dominant_width: int = max(indentations.keys(), key=lambda k: indentations[k])
        style = "spaces" if dominant_width in [2, 4] else "tabs"
        return {"style": style, "width": dominant_width}
    def _analyze_indent_width(self, content: str) -> int | None:
        """Analyze the indentation width from content."""
        lines = content.split("\n")
        indent_counts: dict[int, int] = {}
        for line in lines:
            if not line.strip():
                continue
            leading_spaces = len(line) - len(line.lstrip())
            leading_tabs = len(line) - len(line.lstrip("\t"))
            if leading_spaces > 0 and leading_spaces % 2 == 0:
                indent_counts[leading_spaces] = indent_counts.get(leading_spaces, 0) + 1
            elif leading_tabs > 0:
                return 1
        if indent_counts:
            return min(indent_counts.keys(), key=lambda k: indent_counts[k])
        return None
    def _detect_quote_style(self) -> dict[str, Any]:
        """Detect quote style (single vs double)."""
        single_count = 0
        double_count = 0
        code_files = self._get_code_files()
        for file_path in code_files[:30]:
            content = self._safe_read_file(file_path)
            if content:
                content = self._remove_string_literals(content)
                single_count += content.count("'") - content.count("\\'")
                double_count += content.count('"') - content.count('\\"')
        total = single_count + double_count
        if total == 0:
            return {"style": "unknown", "ratio": None}
        single_ratio = single_count / total
        if single_ratio > 0.6:
            style = "single"
        elif double_ratio := 1 - single_ratio > 0.6:
            style = "double"
        else:
            style = "mixed"
        return {"style": style, "single_ratio": round(single_ratio, 2)}
    def _remove_string_literals(self, content: str) -> str:
        """Remove string literals from content to avoid false positives."""
        pattern = r'(?:"(?:[^"\\]|\\.)*")|(?:\'(?:[^\'\\]|\\.)*\')'
        return re.sub(pattern, '""', content)
    def _detect_line_endings(self) -> dict[str, Any]:
        """Detect line ending style (LF vs CRLF)."""
        crlf_count = 0
        lf_count = 0
        code_files = self._get_code_files()
        for file_path in code_files[:20]:
            content = self._safe_read_file(file_path)
            if content:
                crlf_count += content.count("\r\n")
                lf_count += content.count("\n") - crlf_count
        total = crlf_count + lf_count
        if total == 0:
            return {"style": "unknown"}
        if crlf_count > lf_count:
            return {"style": "CRLF", "ratio": round(crlf_count / total, 2)}
        else:
            return {"style": "LF", "ratio": round(lf_count / total, 2)}
    def _detect_line_length(self) -> dict[str, Any]:
        """Detect preferred line length."""
        lengths: dict[int, int] = {}
        code_files = self._get_code_files()
        for file_path in code_files[:20]:
            content = self._safe_read_file(file_path)
            if content:
                for line in content.split("\n"):
                    line_len = len(line.rstrip())
                    if line_len > 0:
                        bucket = (line_len // 10) * 10
                        lengths[bucket] = lengths.get(bucket, 0) + 1
        if not lengths:
            return {"max": None, "preferred": None}
        max_bucket: int = max(lengths.keys(), key=lambda k: lengths[k])
        return {"max": max_bucket + 10, "preferred": max_bucket}
    def _detect_trailing_newline(self) -> dict[str, Any]:
        """Detect if files typically have trailing newlines."""
        with_newline = 0
        without_newline = 0
        code_files = self._get_code_files()
        for file_path in code_files[:20]:
            content = self._safe_read_file(file_path)
            if content:
                if content.endswith("\n"):
                    with_newline += 1
                else:
                    without_newline += 1
        total = with_newline + without_newline
        if total == 0:
            return {"has_trailing_newline": None}
        return {
            "has_trailing_newline": with_newline > without_newline,
            "ratio": round(with_newline / total, 2),
        }
    def _get_code_files(self) -> list[Path]:
        """Get list of code files to analyze."""
        extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php", ".c", ".cpp"]
        files = []
        try:
            for ext in extensions:
                files.extend(self.project_path.rglob(f"*{ext}"))
        except PermissionError:
            pass
        return sorted(set(files))
    def _safe_read_file(self, path: Path) -> str | None:
        """Safely read a file."""
        try:
            return path.read_text(encoding="utf-8")
        except (IOError, UnicodeDecodeError):
            return None