diff --git a/src/contextgen/analyzers/documentation_analyzer.py b/src/contextgen/analyzers/documentation_analyzer.py new file mode 100644 index 0000000..d78038d --- /dev/null +++ b/src/contextgen/analyzers/documentation_analyzer.py @@ -0,0 +1,190 @@ +"""Documentation pattern analyzer for comments and docstrings.""" + +from pathlib import Path +from typing import Any + + +class DocumentationPatternAnalyzer: + """Analyzes documentation and comment patterns.""" + + def __init__(self, project_path: Path): + self.project_path = project_path + + def analyze(self) -> dict[str, Any]: + """Analyze documentation patterns across project files.""" + docstring_style = self._detect_docstring_style() + comment_style = self._detect_comment_style() + documentation_coverage = self._calculate_coverage() + + return { + "style": docstring_style.get("style", "unknown"), + "docstring_details": docstring_style, + "comment_style": comment_style, + "documentation_coverage": documentation_coverage, + } + + def _detect_docstring_style(self) -> dict[str, Any]: + """Detect docstring style (GooglePy, Sphinx,, Num etc.).""" + styles: dict[str, int] = {} + + code_files = self._get_code_files() + + for file_path in code_files[:30]: + content = self._safe_read_file(file_path) + if content: + style = self._classify_docstring(content) + if style: + styles[style] = styles.get(style, 0) + 1 + + if not styles: + return {"style": "unknown", "patterns_found": {}} + + dominant = max(styles, key=styles.get) + + return { + "style": dominant, + "patterns_found": styles, + "examples": self._extract_docstring_examples(code_files, dominant), + } + + def _classify_docstring(self, content: str) -> str | None: + """Classify the docstring style used in content.""" + google_patterns = [ + '"""Args:', + '"""Returns:', + '"""Raises:', + '"""Attributes:', + '"""Examples:', + ] + + numpy_patterns = [ + '"""Parameters', + '"""Returns', + '"""Examples', + '"""Notes', + '"""References', + ] + + sphinx_patterns = [ + '""".. param', + '""".. return', + '""".. raises', + '""".. attribute', + ':param ', + ':type ', + ':return:', + ] + + for pattern in google_patterns: + if pattern in content: + return "google" + + for pattern in numpy_patterns: + if pattern in content: + return "numpy" + + for pattern in sphinx_patterns: + if pattern in content: + return "sphinx" + + if '"""' in content: + return "basic" + + return None + + def _detect_comment_style(self) -> dict[str, Any]: + """Detect comment style (single-line, multi-line, etc.).""" + styles: dict[str, int] = {} + + code_files = self._get_code_files() + + for file_path in code_files[:30]: + content = self._safe_read_file(file_path) + if content: + if "# " in content: + styles["hash_comments"] = styles.get("hash_comments", 0) + 1 + if "// " in content: + styles["double_slash_comments"] = styles.get("double_slash_comments", 0) + 1 + if "/*" in content: + styles["c_style_comments"] = styles.get("c_style_comments", 0) + 1 + if "