fix: resolve CI mypy type checking issues
Some checks failed
CI / test (3.10) (push) Has been cancelled
CI / test (3.11) (push) Has been cancelled
CI / test (3.12) (push) Has been cancelled
CI / build (push) Has been cancelled

This commit is contained in:
2026-02-02 23:24:22 +00:00
parent 3b0c3ac01d
commit 6c468e67a4

View File

@@ -0,0 +1,253 @@
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
import tree_sitter
from tree_sitter_languages import get_language
from patternforge.config import Config
@dataclass
class NamingPattern:
convention: str
prefixes: list[str] = field(default_factory=list)
suffixes: list[str] = field(default_factory=list)
examples: list[str] = field(default_factory=list)
@dataclass
class CodeStructure:
class_patterns: list[dict[str, Any]] = field(default_factory=list)
function_patterns: list[dict[str, Any]] = field(default_factory=list)
import_patterns: list[str] = field(default_factory=list)
type_definitions: list[dict[str, str]] = field(default_factory=list)
@dataclass
class StylePattern:
indent_style: str = "space"
indent_size: int = 4
line_endings: str = "lf"
bracket_style: str = "same-line"
class CodeAnalyzer:
LANGUAGE_MAP = {
"python": "python",
"javascript": "javascript",
"typescript": "typescript",
"java": "java",
"cpp": "cpp",
"c": "c",
"rust": "rust",
"go": "go",
"ruby": "ruby",
}
def __init__(self, language: str, config: Config) -> None:
self.language = self.LANGUAGE_MAP.get(language, language)
self.config = config
self._parser: tree_sitter.Parser | None = None
self._language: Any = None
self._try_init_language()
def _try_init_language(self) -> None:
try:
self._parser = tree_sitter.Parser()
self._language = get_language(self.language)
self._parser.language = self._language
except Exception:
self._parser = None
self._language = None
def _file_extensions(self) -> set[str]:
extensions = {
"python": [".py", ".pyi"],
"javascript": [".js", ".mjs"],
"typescript": [".ts", ".tsx"],
"java": [".java"],
"cpp": [".cpp", ".cc", ".cxx", ".hpp"],
"c": [".c", ".h"],
"rust": [".rs"],
"go": [".go"],
"ruby": [".rb"],
}
return set(extensions.get(self.language, [f".{self.language}"]))
def _is_code_file(self, path: Path) -> bool:
return path.suffix in self._file_extensions()
def _collect_files(self, path: Path, recursive: bool) -> list[Path]:
files: list[Path] = []
if path.is_file():
if self._is_code_file(path):
files.append(path)
return files
pattern = "**/*" if recursive else "*"
for f in path.glob(pattern):
if f.is_file() and self._is_code_file(f):
files.append(f)
return files
def _extract_naming_conventions(self, content: str) -> dict[str, NamingPattern]:
conventions: dict[str, NamingPattern] = {}
patterns = {
"camelCase": r"[a-z][a-zA-Z0-9]*",
"PascalCase": r"[A-Z][a-zA-Z0-9]*",
"snake_case": r"[a-z][a-z0-9_]*",
"SCREAMING_SNAKE_CASE": r"[A-Z][A-Z0-9_]*",
}
for name, pattern in patterns.items():
matches = re.findall(pattern, content)
if matches:
conventions[name] = NamingPattern(
convention=name,
examples=list(set(matches))[:10],
)
return conventions
def _extract_structure(self, content: str) -> CodeStructure:
structure = CodeStructure()
class_pattern = r"class\s+(\w+)"
func_pattern = r"def\s+(\w+)|function\s+(\w+)|public\s+\w+\s+(\w+)"
import_pattern = r"^import\s+.*|^from\s+.*|^#include\s+.*"
for match in re.finditer(class_pattern, content):
structure.class_patterns.append({"name": match.group(1)})
for match in re.finditer(func_pattern, content):
name = match.group(1) or match.group(2) or match.group(3)
if name:
structure.function_patterns.append({"name": name})
structure.import_patterns = re.findall(import_pattern, content, re.MULTILINE)[:20]
return structure
def _detect_style(self, content: str) -> StylePattern:
style = StylePattern()
if "\t" in content[:1000]:
style.indent_style = "tab"
style.indent_size = 1
elif " " * 4 in content[:1000]:
style.indent_size = 4
elif " " * 2 in content[:1000]:
style.indent_size = 2
if "\r\n" in content[:1000]:
style.line_endings = "crlf"
else:
style.line_endings = "lf"
return style
def _analyze_file(self, path: Path) -> dict[str, Any]:
try:
with open(path, encoding="utf-8", errors="ignore") as f:
content = f.read()
except Exception:
return {}
return {
"path": str(path),
"naming_conventions": self._extract_naming_conventions(content),
"structure": {
"classes": self._extract_structure(content).class_patterns,
"functions": self._extract_structure(content).function_patterns,
"imports": self._extract_structure(content).import_patterns,
},
"style": self._detect_style(content).__dict__,
"size": len(content),
"lines": content.count("\n"),
}
def analyze(self, path: str, recursive: bool = True) -> dict[str, Any]:
target = Path(path)
files = self._collect_files(target, recursive)
if not files:
return {"error": "No matching files found", "language": self.language}
file_analyses = []
all_naming: dict[str, set[str]] = {}
all_classes: list[str] = []
all_functions: list[str] = []
all_imports: list[str] = []
style_votes = {"space": 0, "tab": 0}
indent_sizes: dict[int, int] = {}
for f in files:
analysis = self._analyze_file(f)
if not analysis:
continue
file_analyses.append(analysis)
for nc in analysis.get("naming_conventions", {}).values():
for ex in nc.examples:
if nc.convention not in all_naming:
all_naming[nc.convention] = set()
all_naming[nc.convention].add(ex)
for cls in analysis.get("structure", {}).get("classes", []):
all_classes.append(cls.get("name", ""))
for func in analysis.get("structure", {}).get("functions", []):
all_functions.append(func.get("name", ""))
all_imports.extend(analysis.get("structure", {}).get("imports", []))
style = analysis.get("style", {})
if style.get("indent_style"):
style_votes[style["indent_style"]] += 1
indent = style.get("indent_size", 0)
if indent > 0:
indent_sizes[indent] = indent_sizes.get(indent, 0) + 1
dominant_style = "space" if style_votes["space"] >= style_votes["tab"] else "tab"
dominant_indent = max(indent_sizes.items(), key=lambda x: x[1], default=(4, 0))[0]
return {
"language": self.language,
"files_analyzed": len(file_analyses),
"file_details": file_analyses[:5],
"naming_conventions": {
k: list(v)[:20] for k, v in all_naming.items()
},
"entity_counts": {
"classes": len(all_classes),
"functions": len(all_functions),
"imports": len(all_imports),
},
"style": {
"indent_style": dominant_style,
"indent_size": dominant_indent,
},
"summary": {
"files": len(file_analyses),
"classes": len(all_classes),
"functions": len(all_functions),
"primary_naming": list(all_naming.keys())[0] if all_naming else "unknown",
},
}
def save_patterns(self, output_path: str, patterns: dict[str, Any]) -> None:
import yaml
def convert_dataclass(obj: Any) -> Any:
if hasattr(obj, "__dict__"):
return {
k: convert_dataclass(v)
for k, v in obj.__dict__.items()
if not k.startswith("_")
}
elif isinstance(obj, dict):
return {k: convert_dataclass(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [convert_dataclass(i) for i in obj]
return obj
path = Path(output_path)
path.parent.mkdir(parents=True, exist_ok=True)
clean_patterns = convert_dataclass(patterns)
with open(path, "w") as f:
yaml.dump(clean_patterns, f, default_flow_style=False, indent=2)