From aa7d59a8059a05cadb80f6527a76250bc05be054 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Mon, 2 Feb 2026 22:25:59 +0000 Subject: [PATCH] Initial upload: PatternForge CLI tool with pattern detection and boilerplate generation --- src/patternforge/analyzer.py | 251 +++++++++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 src/patternforge/analyzer.py diff --git a/src/patternforge/analyzer.py b/src/patternforge/analyzer.py new file mode 100644 index 0000000..fe4ada7 --- /dev/null +++ b/src/patternforge/analyzer.py @@ -0,0 +1,251 @@ +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import tree_sitter +from tree_sitter_languages import get_language + +from patternforge.config import Config + + +@dataclass +class NamingPattern: + convention: str + prefixes: list[str] = field(default_factory=list) + suffixes: list[str] = field(default_factory=list) + examples: list[str] = field(default_factory=list) + + +@dataclass +class CodeStructure: + class_patterns: list[dict[str, Any]] = field(default_factory=list) + function_patterns: list[dict[str, Any]] = field(default_factory=list) + import_patterns: list[str] = field(default_factory=list) + type_definitions: list[dict[str, str]] = field(default_factory=list) + + +@dataclass +class StylePattern: + indent_style: str = "space" + indent_size: int = 4 + line_endings: str = "lf" + bracket_style: str = "same-line" + + +class CodeAnalyzer: + LANGUAGE_MAP = { + "python": "python", + "javascript": "javascript", + "typescript": "typescript", + "java": "java", + "cpp": "cpp", + "c": "c", + "rust": "rust", + "go": "go", + "ruby": "ruby", + } + + def __init__(self, language: str, config: Config) -> None: + self.language = self.LANGUAGE_MAP.get(language, language) + self.config = config + self._try_init_language() + + def _try_init_language(self) -> None: + try: + self._parser = tree_sitter.Parser() + self._language = get_language(self.language) + self._parser.set_language(self._language) + except Exception: + self._parser = None + self._language = None + + def _file_extensions(self) -> set[str]: + extensions = { + "python": [".py", ".pyi"], + "javascript": [".js", ".mjs"], + "typescript": [".ts", ".tsx"], + "java": [".java"], + "cpp": [".cpp", ".cc", ".cxx", ".hpp"], + "c": [".c", ".h"], + "rust": [".rs"], + "go": [".go"], + "ruby": [".rb"], + } + return extensions.get(self.language, [f".{self.language}"]) + + def _is_code_file(self, path: Path) -> bool: + return path.suffix in self._file_extensions() + + def _collect_files(self, path: Path, recursive: bool) -> list[Path]: + files: list[Path] = [] + if path.is_file(): + if self._is_code_file(path): + files.append(path) + return files + pattern = "**/*" if recursive else "*" + for f in path.glob(pattern): + if f.is_file() and self._is_code_file(f): + files.append(f) + return files + + def _extract_naming_conventions(self, content: str) -> dict[str, NamingPattern]: + conventions: dict[str, NamingPattern] = {} + patterns = { + "camelCase": r"[a-z][a-zA-Z0-9]*", + "PascalCase": r"[A-Z][a-zA-Z0-9]*", + "snake_case": r"[a-z][a-z0-9_]*", + "SCREAMING_SNAKE_CASE": r"[A-Z][A-Z0-9_]*", + } + for name, pattern in patterns.items(): + matches = re.findall(pattern, content) + if matches: + conventions[name] = NamingPattern( + convention=name, + examples=list(set(matches))[:10], + ) + return conventions + + def _extract_structure(self, content: str) -> CodeStructure: + structure = CodeStructure() + class_pattern = r"class\s+(\w+)" + func_pattern = r"def\s+(\w+)|function\s+(\w+)|public\s+\w+\s+(\w+)" + import_pattern = r"^import\s+.*|^from\s+.*|^#include\s+.*" + + for match in re.finditer(class_pattern, content): + structure.class_patterns.append({"name": match.group(1)}) + + for match in re.finditer(func_pattern, content): + name = match.group(1) or match.group(2) or match.group(3) + if name: + structure.function_patterns.append({"name": name}) + + structure.import_patterns = re.findall(import_pattern, content, re.MULTILINE)[:20] + + return structure + + def _detect_style(self, content: str) -> StylePattern: + style = StylePattern() + if "\t" in content[:1000]: + style.indent_style = "tab" + style.indent_size = 1 + elif " " * 4 in content[:1000]: + style.indent_size = 4 + elif " " * 2 in content[:1000]: + style.indent_size = 2 + + if "\r\n" in content[:1000]: + style.line_endings = "crlf" + else: + style.line_endings = "lf" + + return style + + def _analyze_file(self, path: Path) -> dict[str, Any]: + try: + with open(path, encoding="utf-8", errors="ignore") as f: + content = f.read() + except Exception: + return {} + + return { + "path": str(path), + "naming_conventions": self._extract_naming_conventions(content), + "structure": { + "classes": self._extract_structure(content).class_patterns, + "functions": self._extract_structure(content).function_patterns, + "imports": self._extract_structure(content).import_patterns, + }, + "style": self._detect_style(content).__dict__, + "size": len(content), + "lines": content.count("\n"), + } + + def analyze(self, path: str, recursive: bool = True) -> dict[str, Any]: + target = Path(path) + files = self._collect_files(target, recursive) + if not files: + return {"error": "No matching files found", "language": self.language} + + file_analyses = [] + all_naming: dict[str, set[str]] = {} + all_classes: list[str] = [] + all_functions: list[str] = [] + all_imports: list[str] = [] + style_votes = {"space": 0, "tab": 0} + indent_sizes: dict[int, int] = {} + + for f in files: + analysis = self._analyze_file(f) + if not analysis: + continue + file_analyses.append(analysis) + for nc in analysis.get("naming_conventions", {}).values(): + for ex in nc.examples: + if nc.convention not in all_naming: + all_naming[nc.convention] = set() + all_naming[nc.convention].add(ex) + + for cls in analysis.get("structure", {}).get("classes", []): + all_classes.append(cls.get("name", "")) + + for func in analysis.get("structure", {}).get("functions", []): + all_functions.append(func.get("name", "")) + + all_imports.extend(analysis.get("structure", {}).get("imports", [])) + + style = analysis.get("style", {}) + if style.get("indent_style"): + style_votes[style["indent_style"]] += 1 + indent = style.get("indent_size", 0) + if indent > 0: + indent_sizes[indent] = indent_sizes.get(indent, 0) + 1 + + dominant_style = "space" if style_votes["space"] >= style_votes["tab"] else "tab" + dominant_indent = max(indent_sizes, key=indent_sizes.get, default=4) + + return { + "language": self.language, + "files_analyzed": len(file_analyses), + "file_details": file_analyses[:5], + "naming_conventions": { + k: list(v)[:20] for k, v in all_naming.items() + }, + "entity_counts": { + "classes": len(all_classes), + "functions": len(all_functions), + "imports": len(all_imports), + }, + "style": { + "indent_style": dominant_style, + "indent_size": dominant_indent, + }, + "summary": { + "files": len(file_analyses), + "classes": len(all_classes), + "functions": len(all_functions), + "primary_naming": list(all_naming.keys())[0] if all_naming else "unknown", + }, + } + + def save_patterns(self, output_path: str, patterns: dict[str, Any]) -> None: + import yaml + + def convert_dataclass(obj: Any) -> Any: + if hasattr(obj, "__dict__"): + return { + k: convert_dataclass(v) + for k, v in obj.__dict__.items() + if not k.startswith("_") + } + elif isinstance(obj, dict): + return {k: convert_dataclass(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [convert_dataclass(i) for i in obj] + return obj + + path = Path(output_path) + path.parent.mkdir(parents=True, exist_ok=True) + clean_patterns = convert_dataclass(patterns) + with open(path, "w") as f: + yaml.dump(clean_patterns, f, default_flow_style=False, indent=2)