Add naming, style, and documentation analyzers

2026-01-29 13:23:39 +00:00
parent 8a302a291e
commit b05945ced4
1 changed files with 190 additions and 0 deletions
--- a/src/contextgen/analyzers/documentation_analyzer.py
+++ b/src/contextgen/analyzers/documentation_analyzer.py
@@ -0,0 +1,190 @@
+"""Documentation pattern analyzer for comments and docstrings."""
+
+from pathlib import Path
+from typing import Any
+
+
+class DocumentationPatternAnalyzer:
+    """Analyzes documentation and comment patterns."""
+
+    def __init__(self, project_path: Path):
+        self.project_path = project_path
+
+    def analyze(self) -> dict[str, Any]:
+        """Analyze documentation patterns across project files."""
+        docstring_style = self._detect_docstring_style()
+        comment_style = self._detect_comment_style()
+        documentation_coverage = self._calculate_coverage()
+
+        return {
+            "style": docstring_style.get("style", "unknown"),
+            "docstring_details": docstring_style,
+            "comment_style": comment_style,
+            "documentation_coverage": documentation_coverage,
+        }
+
+    def _detect_docstring_style(self) -> dict[str, Any]:
+        """Detect docstring style (GooglePy, Sphinx,, Num etc.)."""
+        styles: dict[str, int] = {}
+        
+        code_files = self._get_code_files()
+        
+        for file_path in code_files[:30]:
+            content = self._safe_read_file(file_path)
+            if content:
+                style = self._classify_docstring(content)
+                if style:
+                    styles[style] = styles.get(style, 0) + 1
+        
+        if not styles:
+            return {"style": "unknown", "patterns_found": {}}
+        
+        dominant = max(styles, key=styles.get)
+        
+        return {
+            "style": dominant,
+            "patterns_found": styles,
+            "examples": self._extract_docstring_examples(code_files, dominant),
+        }
+
+    def _classify_docstring(self, content: str) -> str | None:
+        """Classify the docstring style used in content."""
+        google_patterns = [
+            '"""Args:',
+            '"""Returns:',
+            '"""Raises:',
+            '"""Attributes:',
+            '"""Examples:',
+        ]
+        
+        numpy_patterns = [
+            '"""Parameters',
+            '"""Returns',
+            '"""Examples',
+            '"""Notes',
+            '"""References',
+        ]
+        
+        sphinx_patterns = [
+            '""".. param',
+            '""".. return',
+            '""".. raises',
+            '""".. attribute',
+            ':param ',
+            ':type ',
+            ':return:',
+        ]
+        
+        for pattern in google_patterns:
+            if pattern in content:
+                return "google"
+        
+        for pattern in numpy_patterns:
+            if pattern in content:
+                return "numpy"
+        
+        for pattern in sphinx_patterns:
+            if pattern in content:
+                return "sphinx"
+        
+        if '"""' in content:
+            return "basic"
+        
+        return None
+
+    def _detect_comment_style(self) -> dict[str, Any]:
+        """Detect comment style (single-line, multi-line, etc.)."""
+        styles: dict[str, int] = {}
+        
+        code_files = self._get_code_files()
+        
+        for file_path in code_files[:30]:
+            content = self._safe_read_file(file_path)
+            if content:
+                if "# " in content:
+                    styles["hash_comments"] = styles.get("hash_comments", 0) + 1
+                if "// " in content:
+                    styles["double_slash_comments"] = styles.get("double_slash_comments", 0) + 1
+                if "/*" in content:
+                    styles["c_style_comments"] = styles.get("c_style_comments", 0) + 1
+                if "<!--" in content:
+                    styles["html_comments"] = styles.get("html_comments", 0) + 1
+        
+        return {
+            "styles_used": list(styles.keys()),
+            "dominant": max(styles, key=styles.get) if styles else "unknown",
+        }
+
+    def _calculate_coverage(self) -> dict[str, Any]:
+        """Calculate documentation coverage."""
+        total_items = 0
+        documented_items = 0
+        
+        code_files = self._get_code_files()
+        
+        for file_path in code_files[:30]:
+            content = self._safe_read_file(file_path)
+            if content:
+                items, documented = self._count_documented_items(content)
+                total_items += items
+                documented_items += documented
+        
+        if total_items == 0:
+            return {"ratio": None, "documented": 0, "total": 0}
+        
+        return {
+            "ratio": round(documented_items / total_items, 2),
+            "documented": documented_items,
+            "total": total_items,
+        }
+
+    def _count_documented_items(self, content: str) -> tuple[int, int]:
+        """Count functions/classes and their documentation."""
+        import re
+        
+        functions = re.findall(r"(?:def\s+\w+|function\s+\w+)", content)
+        classes = re.findall(r"class\s+\w+", content)
+        
+        total = len(functions) + len(classes)
+        
+        docstring_pattern = r'"""[\s\S]*?"""'
+        documented = len(re.findall(docstring_pattern, content))
+        
+        return (total, min(documented, total))
+
+    def _extract_docstring_examples(
+        self, files: list[Path], style: str
+    ) -> list[str]:
+        """Extract example docstrings."""
+        examples = []
+        
+        for file_path in files[:10]:
+            content = self._safe_read_file(file_path)
+            if content and style in content:
+                start = content.find('"""')
+                if start != -1:
+                    end = content.find('"""', start + 3)
+                    if end != -1 and end - start < 500:
+                        examples.append(content[start : end + 3])
+                        if len(examples) >= 3:
+                            break
+        
+        return examples
+
+    def _get_code_files(self) -> list[Path]:
+        """Get list of code files to analyze."""
+        extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php"]
+        files = []
+        try:
+            for ext in extensions:
+                files.extend(self.project_path.rglob(f"*{ext}"))
+        except PermissionError:
+            pass
+        return sorted(set(files))
+
+    def _safe_read_file(self, path: Path) -> str | None:
+        """Safely read a file."""
+        try:
+            return path.read_text(encoding="utf-8")
+        except (IOError, UnicodeDecodeError):
+            return None