Add naming, style, and documentation analyzers

2026-01-29 13:23:39 +00:00
parent 8a302a291e
commit b05945ced4
1 changed files with 190 additions and 0 deletions
--- a/src/contextgen/analyzers/documentation_analyzer.py
+++ b/src/contextgen/analyzers/documentation_analyzer.py
@@ -0,0 +1,190 @@
 """Documentation pattern analyzer for comments and docstrings."""
 from pathlib import Path
 from typing import Any
 class DocumentationPatternAnalyzer:
    """Analyzes documentation and comment patterns."""
    def __init__(self, project_path: Path):
        self.project_path = project_path
    def analyze(self) -> dict[str, Any]:
        """Analyze documentation patterns across project files."""
        docstring_style = self._detect_docstring_style()
        comment_style = self._detect_comment_style()
        documentation_coverage = self._calculate_coverage()
        return {
            "style": docstring_style.get("style", "unknown"),
            "docstring_details": docstring_style,
            "comment_style": comment_style,
            "documentation_coverage": documentation_coverage,
        }
    def _detect_docstring_style(self) -> dict[str, Any]:
        """Detect docstring style (GooglePy, Sphinx,, Num etc.)."""
        styles: dict[str, int] = {}
        code_files = self._get_code_files()
        for file_path in code_files[:30]:
            content = self._safe_read_file(file_path)
            if content:
                style = self._classify_docstring(content)
                if style:
                    styles[style] = styles.get(style, 0) + 1
        if not styles:
            return {"style": "unknown", "patterns_found": {}}
        dominant = max(styles, key=styles.get)
        return {
            "style": dominant,
            "patterns_found": styles,
            "examples": self._extract_docstring_examples(code_files, dominant),
        }
    def _classify_docstring(self, content: str) -> str | None:
        """Classify the docstring style used in content."""
        google_patterns = [
            '"""Args:',
            '"""Returns:',
            '"""Raises:',
            '"""Attributes:',
            '"""Examples:',
        ]
        numpy_patterns = [
            '"""Parameters',
            '"""Returns',
            '"""Examples',
            '"""Notes',
            '"""References',
        ]
        sphinx_patterns = [
            '""".. param',
            '""".. return',
            '""".. raises',
            '""".. attribute',
            ':param ',
            ':type ',
            ':return:',
        ]
        for pattern in google_patterns:
            if pattern in content:
                return "google"
        for pattern in numpy_patterns:
            if pattern in content:
                return "numpy"
        for pattern in sphinx_patterns:
            if pattern in content:
                return "sphinx"
        if '"""' in content:
            return "basic"
        return None
    def _detect_comment_style(self) -> dict[str, Any]:
        """Detect comment style (single-line, multi-line, etc.)."""
        styles: dict[str, int] = {}
        code_files = self._get_code_files()
        for file_path in code_files[:30]:
            content = self._safe_read_file(file_path)
            if content:
                if "# " in content:
                    styles["hash_comments"] = styles.get("hash_comments", 0) + 1
                if "// " in content:
                    styles["double_slash_comments"] = styles.get("double_slash_comments", 0) + 1
                if "/*" in content:
                    styles["c_style_comments"] = styles.get("c_style_comments", 0) + 1
                if "<!--" in content:
                    styles["html_comments"] = styles.get("html_comments", 0) + 1
        return {
            "styles_used": list(styles.keys()),
            "dominant": max(styles, key=styles.get) if styles else "unknown",
        }
    def _calculate_coverage(self) -> dict[str, Any]:
        """Calculate documentation coverage."""
        total_items = 0
        documented_items = 0
        code_files = self._get_code_files()
        for file_path in code_files[:30]:
            content = self._safe_read_file(file_path)
            if content:
                items, documented = self._count_documented_items(content)
                total_items += items
                documented_items += documented
        if total_items == 0:
            return {"ratio": None, "documented": 0, "total": 0}
        return {
            "ratio": round(documented_items / total_items, 2),
            "documented": documented_items,
            "total": total_items,
        }
    def _count_documented_items(self, content: str) -> tuple[int, int]:
        """Count functions/classes and their documentation."""
        import re
        functions = re.findall(r"(?:def\s+\w+|function\s+\w+)", content)
        classes = re.findall(r"class\s+\w+", content)
        total = len(functions) + len(classes)
        docstring_pattern = r'"""[\s\S]*?"""'
        documented = len(re.findall(docstring_pattern, content))
        return (total, min(documented, total))
    def _extract_docstring_examples(
        self, files: list[Path], style: str
    ) -> list[str]:
        """Extract example docstrings."""
        examples = []
        for file_path in files[:10]:
            content = self._safe_read_file(file_path)
            if content and style in content:
                start = content.find('"""')
                if start != -1:
                    end = content.find('"""', start + 3)
                    if end != -1 and end - start < 500:
                        examples.append(content[start : end + 3])
                        if len(examples) >= 3:
                            break
        return examples
    def _get_code_files(self) -> list[Path]:
        """Get list of code files to analyze."""
        extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php"]
        files = []
        try:
            for ext in extensions:
                files.extend(self.project_path.rglob(f"*{ext}"))
        except PermissionError:
            pass
        return sorted(set(files))
    def _safe_read_file(self, path: Path) -> str | None:
        """Safely read a file."""
        try:
            return path.read_text(encoding="utf-8")
        except (IOError, UnicodeDecodeError):
            return None