Add naming, style, and documentation analyzers
This commit is contained in:
190
src/contextgen/analyzers/documentation_analyzer.py
Normal file
190
src/contextgen/analyzers/documentation_analyzer.py
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
"""Documentation pattern analyzer for comments and docstrings."""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentationPatternAnalyzer:
|
||||||
|
"""Analyzes documentation and comment patterns."""
|
||||||
|
|
||||||
|
def __init__(self, project_path: Path):
|
||||||
|
self.project_path = project_path
|
||||||
|
|
||||||
|
def analyze(self) -> dict[str, Any]:
|
||||||
|
"""Analyze documentation patterns across project files."""
|
||||||
|
docstring_style = self._detect_docstring_style()
|
||||||
|
comment_style = self._detect_comment_style()
|
||||||
|
documentation_coverage = self._calculate_coverage()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"style": docstring_style.get("style", "unknown"),
|
||||||
|
"docstring_details": docstring_style,
|
||||||
|
"comment_style": comment_style,
|
||||||
|
"documentation_coverage": documentation_coverage,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _detect_docstring_style(self) -> dict[str, Any]:
|
||||||
|
"""Detect docstring style (GooglePy, Sphinx,, Num etc.)."""
|
||||||
|
styles: dict[str, int] = {}
|
||||||
|
|
||||||
|
code_files = self._get_code_files()
|
||||||
|
|
||||||
|
for file_path in code_files[:30]:
|
||||||
|
content = self._safe_read_file(file_path)
|
||||||
|
if content:
|
||||||
|
style = self._classify_docstring(content)
|
||||||
|
if style:
|
||||||
|
styles[style] = styles.get(style, 0) + 1
|
||||||
|
|
||||||
|
if not styles:
|
||||||
|
return {"style": "unknown", "patterns_found": {}}
|
||||||
|
|
||||||
|
dominant = max(styles, key=styles.get)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"style": dominant,
|
||||||
|
"patterns_found": styles,
|
||||||
|
"examples": self._extract_docstring_examples(code_files, dominant),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _classify_docstring(self, content: str) -> str | None:
|
||||||
|
"""Classify the docstring style used in content."""
|
||||||
|
google_patterns = [
|
||||||
|
'"""Args:',
|
||||||
|
'"""Returns:',
|
||||||
|
'"""Raises:',
|
||||||
|
'"""Attributes:',
|
||||||
|
'"""Examples:',
|
||||||
|
]
|
||||||
|
|
||||||
|
numpy_patterns = [
|
||||||
|
'"""Parameters',
|
||||||
|
'"""Returns',
|
||||||
|
'"""Examples',
|
||||||
|
'"""Notes',
|
||||||
|
'"""References',
|
||||||
|
]
|
||||||
|
|
||||||
|
sphinx_patterns = [
|
||||||
|
'""".. param',
|
||||||
|
'""".. return',
|
||||||
|
'""".. raises',
|
||||||
|
'""".. attribute',
|
||||||
|
':param ',
|
||||||
|
':type ',
|
||||||
|
':return:',
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in google_patterns:
|
||||||
|
if pattern in content:
|
||||||
|
return "google"
|
||||||
|
|
||||||
|
for pattern in numpy_patterns:
|
||||||
|
if pattern in content:
|
||||||
|
return "numpy"
|
||||||
|
|
||||||
|
for pattern in sphinx_patterns:
|
||||||
|
if pattern in content:
|
||||||
|
return "sphinx"
|
||||||
|
|
||||||
|
if '"""' in content:
|
||||||
|
return "basic"
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _detect_comment_style(self) -> dict[str, Any]:
|
||||||
|
"""Detect comment style (single-line, multi-line, etc.)."""
|
||||||
|
styles: dict[str, int] = {}
|
||||||
|
|
||||||
|
code_files = self._get_code_files()
|
||||||
|
|
||||||
|
for file_path in code_files[:30]:
|
||||||
|
content = self._safe_read_file(file_path)
|
||||||
|
if content:
|
||||||
|
if "# " in content:
|
||||||
|
styles["hash_comments"] = styles.get("hash_comments", 0) + 1
|
||||||
|
if "// " in content:
|
||||||
|
styles["double_slash_comments"] = styles.get("double_slash_comments", 0) + 1
|
||||||
|
if "/*" in content:
|
||||||
|
styles["c_style_comments"] = styles.get("c_style_comments", 0) + 1
|
||||||
|
if "<!--" in content:
|
||||||
|
styles["html_comments"] = styles.get("html_comments", 0) + 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
"styles_used": list(styles.keys()),
|
||||||
|
"dominant": max(styles, key=styles.get) if styles else "unknown",
|
||||||
|
}
|
||||||
|
|
||||||
|
def _calculate_coverage(self) -> dict[str, Any]:
|
||||||
|
"""Calculate documentation coverage."""
|
||||||
|
total_items = 0
|
||||||
|
documented_items = 0
|
||||||
|
|
||||||
|
code_files = self._get_code_files()
|
||||||
|
|
||||||
|
for file_path in code_files[:30]:
|
||||||
|
content = self._safe_read_file(file_path)
|
||||||
|
if content:
|
||||||
|
items, documented = self._count_documented_items(content)
|
||||||
|
total_items += items
|
||||||
|
documented_items += documented
|
||||||
|
|
||||||
|
if total_items == 0:
|
||||||
|
return {"ratio": None, "documented": 0, "total": 0}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"ratio": round(documented_items / total_items, 2),
|
||||||
|
"documented": documented_items,
|
||||||
|
"total": total_items,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _count_documented_items(self, content: str) -> tuple[int, int]:
|
||||||
|
"""Count functions/classes and their documentation."""
|
||||||
|
import re
|
||||||
|
|
||||||
|
functions = re.findall(r"(?:def\s+\w+|function\s+\w+)", content)
|
||||||
|
classes = re.findall(r"class\s+\w+", content)
|
||||||
|
|
||||||
|
total = len(functions) + len(classes)
|
||||||
|
|
||||||
|
docstring_pattern = r'"""[\s\S]*?"""'
|
||||||
|
documented = len(re.findall(docstring_pattern, content))
|
||||||
|
|
||||||
|
return (total, min(documented, total))
|
||||||
|
|
||||||
|
def _extract_docstring_examples(
|
||||||
|
self, files: list[Path], style: str
|
||||||
|
) -> list[str]:
|
||||||
|
"""Extract example docstrings."""
|
||||||
|
examples = []
|
||||||
|
|
||||||
|
for file_path in files[:10]:
|
||||||
|
content = self._safe_read_file(file_path)
|
||||||
|
if content and style in content:
|
||||||
|
start = content.find('"""')
|
||||||
|
if start != -1:
|
||||||
|
end = content.find('"""', start + 3)
|
||||||
|
if end != -1 and end - start < 500:
|
||||||
|
examples.append(content[start : end + 3])
|
||||||
|
if len(examples) >= 3:
|
||||||
|
break
|
||||||
|
|
||||||
|
return examples
|
||||||
|
|
||||||
|
def _get_code_files(self) -> list[Path]:
|
||||||
|
"""Get list of code files to analyze."""
|
||||||
|
extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php"]
|
||||||
|
files = []
|
||||||
|
try:
|
||||||
|
for ext in extensions:
|
||||||
|
files.extend(self.project_path.rglob(f"*{ext}"))
|
||||||
|
except PermissionError:
|
||||||
|
pass
|
||||||
|
return sorted(set(files))
|
||||||
|
|
||||||
|
def _safe_read_file(self, path: Path) -> str | None:
|
||||||
|
"""Safely read a file."""
|
||||||
|
try:
|
||||||
|
return path.read_text(encoding="utf-8")
|
||||||
|
except (IOError, UnicodeDecodeError):
|
||||||
|
return None
|
||||||
Reference in New Issue
Block a user