Add naming, style, and documentation analyzers
Some checks failed
CI / test (push) Failing after 8s
CI / build (push) Has been skipped

This commit is contained in:
2026-01-29 13:23:39 +00:00
parent 8a302a291e
commit b05945ced4

View File

@@ -0,0 +1,190 @@
"""Documentation pattern analyzer for comments and docstrings."""
from pathlib import Path
from typing import Any
class DocumentationPatternAnalyzer:
"""Analyzes documentation and comment patterns."""
def __init__(self, project_path: Path):
self.project_path = project_path
def analyze(self) -> dict[str, Any]:
"""Analyze documentation patterns across project files."""
docstring_style = self._detect_docstring_style()
comment_style = self._detect_comment_style()
documentation_coverage = self._calculate_coverage()
return {
"style": docstring_style.get("style", "unknown"),
"docstring_details": docstring_style,
"comment_style": comment_style,
"documentation_coverage": documentation_coverage,
}
def _detect_docstring_style(self) -> dict[str, Any]:
"""Detect docstring style (GooglePy, Sphinx,, Num etc.)."""
styles: dict[str, int] = {}
code_files = self._get_code_files()
for file_path in code_files[:30]:
content = self._safe_read_file(file_path)
if content:
style = self._classify_docstring(content)
if style:
styles[style] = styles.get(style, 0) + 1
if not styles:
return {"style": "unknown", "patterns_found": {}}
dominant = max(styles, key=styles.get)
return {
"style": dominant,
"patterns_found": styles,
"examples": self._extract_docstring_examples(code_files, dominant),
}
def _classify_docstring(self, content: str) -> str | None:
"""Classify the docstring style used in content."""
google_patterns = [
'"""Args:',
'"""Returns:',
'"""Raises:',
'"""Attributes:',
'"""Examples:',
]
numpy_patterns = [
'"""Parameters',
'"""Returns',
'"""Examples',
'"""Notes',
'"""References',
]
sphinx_patterns = [
'""".. param',
'""".. return',
'""".. raises',
'""".. attribute',
':param ',
':type ',
':return:',
]
for pattern in google_patterns:
if pattern in content:
return "google"
for pattern in numpy_patterns:
if pattern in content:
return "numpy"
for pattern in sphinx_patterns:
if pattern in content:
return "sphinx"
if '"""' in content:
return "basic"
return None
def _detect_comment_style(self) -> dict[str, Any]:
"""Detect comment style (single-line, multi-line, etc.)."""
styles: dict[str, int] = {}
code_files = self._get_code_files()
for file_path in code_files[:30]:
content = self._safe_read_file(file_path)
if content:
if "# " in content:
styles["hash_comments"] = styles.get("hash_comments", 0) + 1
if "// " in content:
styles["double_slash_comments"] = styles.get("double_slash_comments", 0) + 1
if "/*" in content:
styles["c_style_comments"] = styles.get("c_style_comments", 0) + 1
if "<!--" in content:
styles["html_comments"] = styles.get("html_comments", 0) + 1
return {
"styles_used": list(styles.keys()),
"dominant": max(styles, key=styles.get) if styles else "unknown",
}
def _calculate_coverage(self) -> dict[str, Any]:
"""Calculate documentation coverage."""
total_items = 0
documented_items = 0
code_files = self._get_code_files()
for file_path in code_files[:30]:
content = self._safe_read_file(file_path)
if content:
items, documented = self._count_documented_items(content)
total_items += items
documented_items += documented
if total_items == 0:
return {"ratio": None, "documented": 0, "total": 0}
return {
"ratio": round(documented_items / total_items, 2),
"documented": documented_items,
"total": total_items,
}
def _count_documented_items(self, content: str) -> tuple[int, int]:
"""Count functions/classes and their documentation."""
import re
functions = re.findall(r"(?:def\s+\w+|function\s+\w+)", content)
classes = re.findall(r"class\s+\w+", content)
total = len(functions) + len(classes)
docstring_pattern = r'"""[\s\S]*?"""'
documented = len(re.findall(docstring_pattern, content))
return (total, min(documented, total))
def _extract_docstring_examples(
self, files: list[Path], style: str
) -> list[str]:
"""Extract example docstrings."""
examples = []
for file_path in files[:10]:
content = self._safe_read_file(file_path)
if content and style in content:
start = content.find('"""')
if start != -1:
end = content.find('"""', start + 3)
if end != -1 and end - start < 500:
examples.append(content[start : end + 3])
if len(examples) >= 3:
break
return examples
def _get_code_files(self) -> list[Path]:
"""Get list of code files to analyze."""
extensions = [".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java", ".rb", ".php"]
files = []
try:
for ext in extensions:
files.extend(self.project_path.rglob(f"*{ext}"))
except PermissionError:
pass
return sorted(set(files))
def _safe_read_file(self, path: Path) -> str | None:
"""Safely read a file."""
try:
return path.read_text(encoding="utf-8")
except (IOError, UnicodeDecodeError):
return None