Add naming, style, and documentation analyzers
This commit is contained in:
190
src/contextgen/analyzers/documentation_analyzer.py
Normal file
190
src/contextgen/analyzers/documentation_analyzer.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""Documentation pattern analyzer for comments and docstrings."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
class DocumentationPatternAnalyzer:
|
||||
"""Analyzes documentation and comment patterns."""
|
||||
|
||||
def __init__(self, project_path: Path):
|
||||
self.project_path = project_path
|
||||
|
||||
def analyze(self) -> dict[str, Any]:
|
||||
"""Analyze documentation patterns across project files."""
|
||||
docstring_style = self._detect_docstring_style()
|
||||
comment_style = self._detect_comment_style()
|
||||
documentation_coverage = self._calculate_coverage()
|
||||
|
||||
return {
|
||||
"style": docstring_style.get("style", "unknown"),
|
||||
"docstring_details": docstring_style,
|
||||
"comment_style": comment_style,
|
||||
"documentation_coverage": documentation_coverage,
|
||||
}
|
||||
|
||||
def _detect_docstring_style(self) -> dict[str, Any]:
|
||||
"""Detect docstring style (GooglePy, Sphinx,, Num etc.)."""
|
||||
styles: dict[str, int] = {}
|
||||
|
||||
code_files = self._get_code_files()
|
||||
|
||||
for file_path in code_files[:30]:
|
||||
content = self._safe_read_file(file_path)
|
||||
if content:
|
||||
style = self._classify_docstring(content)
|
||||
if style:
|
||||
styles[style] = styles.get(style, 0) + 1
|
||||
|
||||
if not styles:
|
||||
return {"style": "unknown", "patterns_found": {}}
|
||||
|
||||
dominant = max(styles, key=styles.get)
|
||||
|
||||
return {
|
||||
"style": dominant,
|
||||
"patterns_found": styles,
|
||||
"examples": self._extract_docstring_examples(code_files, dominant),
|
||||
}
|
||||
|
||||
def _classify_docstring(self, content: str) -> str | None:
|
||||
"""Classify the docstring style used in content."""
|
||||
google_patterns = [
|
||||
'"""Args:',
|
||||
'"""Returns:',
|
||||
'"""Raises:',
|
||||
'"""Attributes:',
|
||||
'"""Examples:',
|
||||
]
|
||||
|
||||
numpy_patterns = [
|
||||
'"""Parameters',
|
||||
'"""Returns',
|
||||
'"""Examples',
|
||||
'"""Notes',
|
||||
'"""References',
|
||||
]
|
||||
|
||||
sphinx_patterns = [
|
||||
'""".. param',
|
||||
'""".. return',
|
||||
'""".. raises',
|
||||
'""".. attribute',
|
||||
':param ',
|
||||
':type ',
|
||||
':return:',
|
||||
]
|
||||
|
||||
for pattern in google_patterns:
|
||||
if pattern in content:
|
||||
return "google"
|
||||
|
||||
for pattern in numpy_patterns:
|
||||
if pattern in content:
|
||||
return "numpy"
|
||||
|
||||
for pattern in sphinx_patterns:
|
||||
if pattern in content:
|
||||
return "sphinx"
|
||||
|
||||
if '"""' in content:
|
||||
return "basic"
|
||||
|
||||
return None
|
||||
|
||||
def _detect_comment_style(self) -> dict[str, Any]:
|
||||
"""Detect comment style (single-line, multi-line, etc.)."""
|
||||
styles: dict[str, int] = {}
|
||||
|
||||
code_files = self._get_code_files()
|
||||
|
||||
for file_path in code_files[:30]:
|
||||
content = self._safe_read_file(file_path)
|
||||
if content:
|
||||
if "# " in content:
|
||||
styles["hash_comments"] = styles.get("hash_comments", 0) + 1
|
||||
if "// " in content:
|
||||
styles["double_slash_comments"] = styles.get("double_slash_comments", 0) + 1
|
||||
if "/*" in content:
|
||||
styles["c_style_comments"] = styles.get("c_style_comments", 0) + 1
|
||||
if "<!--" in content:
|
||||
styles["html_comments"] = styles.get("html_comments", 0) + 1
|
||||
|
||||
return {
|
||||
"styles_used": list(styles.keys()),
|
||||
"dominant": max(styles, key=styles.get) if styles else "unknown",
|
||||
}
|
||||
|
||||
def _calculate_coverage(self) -> dict[str, Any]:
|
||||
"""Calculate documentation coverage."""
|
||||
total_items = 0
|
||||
documented_items = 0
|
||||
|
||||
code_files = self._get_code_files()
|
||||
|
||||
for file_path in code_files[:30]:
|
||||
content = self._safe_read_file(file_path)
|
||||
if content:
|
||||
items, documented = self._count_documented_items(content)
|
||||
total_items += items
|
||||
documented_items += documented
|
||||
|
||||
if total_items == 0:
|
||||
return {"ratio": None, "documented": 0, "total": 0}
|
||||
|
||||
return {
|
||||
"ratio": round(documented_items / total_items, 2),
|
||||
"documented": documented_items,
|
||||
"total": total_items,
|
||||
}
|
||||
|
||||
def _count_documented_items(self, content: str) -> tuple[int, int]:
|
||||
"""Count functions/classes and their documentation."""
|
||||
import re
|
||||
|
||||
functions = re.findall(r"(?:def\s+\w+|function\s+\w+)", content)
|
||||
classes = re.findall(r"class\s+\w+", content)
|
||||
|
||||
total = len(functions) + len(classes)
|
||||
|
||||
docstring_pattern = r'"""[\s\S]*?"""'
|
||||
documented = len(re.findall(docstring_pattern, content))
|
||||
|
||||
return (total, min(documented, total))
|
||||
|
||||
def _extract_docstring_examples(
|
||||
self, files: list[Path], style: str
|
||||
) -> list[str]:
|
||||
"""Extract example docstrings."""
|
||||
examples = []
|
||||
|
||||
for file_path in files[:10]:
|
||||
content = self._safe_read_file(file_path)
|
||||
if content and style in content:
|
||||
start = content.find('"""')
|
||||
if start != -1:
|
||||
end = content.find('"""', start + 3)
|
||||
if end != -1 and end - start < 500:
|
||||
examples.append(content[start : end + 3])
|
||||
if len(examples) >= 3:
|
||||
break
|
||||
|
||||
return examples
|
||||
|
||||
def _get_code_files(self) -> list[Path]:
|
||||
"""Get list of code files to analyze."""
|
||||
extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php"]
|
||||
files = []
|
||||
try:
|
||||
for ext in extensions:
|
||||
files.extend(self.project_path.rglob(f"*{ext}"))
|
||||
except PermissionError:
|
||||
pass
|
||||
return sorted(set(files))
|
||||
|
||||
def _safe_read_file(self, path: Path) -> str | None:
|
||||
"""Safely read a file."""
|
||||
try:
|
||||
return path.read_text(encoding="utf-8")
|
||||
except (IOError, UnicodeDecodeError):
|
||||
return None
|
||||
Reference in New Issue
Block a user