This commit is contained in:
263
src/pattern_matcher.py
Normal file
263
src/pattern_matcher.py
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
"""Pattern matching engine for Code Pattern Search CLI."""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from .models import MatchLocation
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FileMatch:
|
||||||
|
"""Represents matches found in a single file."""
|
||||||
|
|
||||||
|
file_path: str
|
||||||
|
matches: list[MatchLocation]
|
||||||
|
match_count: int
|
||||||
|
|
||||||
|
|
||||||
|
LANGUAGE_EXTENSIONS: dict[str, list[str]] = {
|
||||||
|
"python": [".py", ".pyw", ".pyx"],
|
||||||
|
"javascript": [".js", ".jsx", ".mjs"],
|
||||||
|
"typescript": [".ts", ".tsx"],
|
||||||
|
"java": [".java"],
|
||||||
|
"kotlin": [".kt", ".kts"],
|
||||||
|
"go": [".go"],
|
||||||
|
"rust": [".rs"],
|
||||||
|
"cpp": [".cpp", ".cc", ".cxx", ".hpp", ".h", ".hxx"],
|
||||||
|
"c": [".c", ".h"],
|
||||||
|
"c#": [".cs"],
|
||||||
|
"ruby": [".rb", ".erb"],
|
||||||
|
"php": [".php", ".phtml"],
|
||||||
|
"swift": [".swift"],
|
||||||
|
"objective-c": [".m", ".mm"],
|
||||||
|
"scala": [".scala"],
|
||||||
|
"html": [".html", ".htm"],
|
||||||
|
"css": [".css", ".scss", ".sass", ".less"],
|
||||||
|
"json": [".json"],
|
||||||
|
"yaml": [".yaml", ".yml"],
|
||||||
|
"xml": [".xml"],
|
||||||
|
"markdown": [".md", ".markdown"],
|
||||||
|
"shell": [".sh", ".bash", ".zsh"],
|
||||||
|
"powershell": [".ps1"],
|
||||||
|
"dockerfile": ["Dockerfile"],
|
||||||
|
"sql": [".sql"],
|
||||||
|
"vue": [".vue"],
|
||||||
|
"svelte": [".svelte"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
EXTENSION_TO_LANGUAGE: dict[str, str] = {}
|
||||||
|
for lang, extensions in LANGUAGE_EXTENSIONS.items():
|
||||||
|
for ext in extensions:
|
||||||
|
EXTENSION_TO_LANGUAGE[ext] = lang
|
||||||
|
|
||||||
|
|
||||||
|
class PatternLibrary:
|
||||||
|
"""Library of built-in code patterns."""
|
||||||
|
|
||||||
|
PRESETS: dict[str, str] = {
|
||||||
|
"react-useeffect": r"useEffect\s*\([^)]*\)\s*\{",
|
||||||
|
"react-useeffect-deps": r"useEffect\s*\([^)]*,\s*\[[^\]]*\]\s*\)",
|
||||||
|
"python-dataclass": r"@dataclass\s*\n?class\s+\w+",
|
||||||
|
"python-decorator": r"@\w+\s*\n?def\s+\w+\s*\(",
|
||||||
|
"python-async": r"async\s+def\s+\w+",
|
||||||
|
"python-typed-dict": r"class\s+\w+\s*\(\s*TypedDict\s*\)",
|
||||||
|
"go-error-handling": r"if\s+err\s*!=\s*nil",
|
||||||
|
"go-defer": r"defer\s+\w+\.",
|
||||||
|
"go-goroutine": r"go\s+\w+\(",
|
||||||
|
"go-error-wrap": r"fmt\.Errorf\s*\(\s*[\"']",
|
||||||
|
"ts-interface": r"interface\s+\w+\s*\{",
|
||||||
|
"ts-type-alias": r"type\s+\w+\s*=\s*",
|
||||||
|
"ts-generic": r"<[A-Z]\w*>",
|
||||||
|
"js-async-await": r"async\s+\w+\s*\([^)]*\)\s*=>\s*await",
|
||||||
|
"js-fetch": r"fetch\s*\(\s*[\"']",
|
||||||
|
"rust-match": r"match\s+\w+\s*\{",
|
||||||
|
"rust-trait": r"impl\s+\w+\s+for\s+\w+",
|
||||||
|
"rust-lifetime": r"'[a-z]",
|
||||||
|
"java-stream": r"\.stream\s*\(\)",
|
||||||
|
"java-optional": r"Optional\.",
|
||||||
|
"ruby-on-rails": r"class\s+\w+\s*<\s*ApplicationController",
|
||||||
|
"docker-cmd": r"CMD\s+\[",
|
||||||
|
"docker-entrypoint": r"ENTRYPOINT\s+\[",
|
||||||
|
"sql-select": r"SELECT\s+\*?\s+FROM\s+\w+",
|
||||||
|
"sql-join": r"JOIN\s+\w+\s+ON",
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_pattern(cls, name: str) -> Optional[str]:
|
||||||
|
"""Get a preset pattern by name."""
|
||||||
|
return cls.PRESETS.get(name)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def list_presets(cls) -> list[str]:
|
||||||
|
"""List all available preset names."""
|
||||||
|
return sorted(cls.PRESETS.keys())
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_presets_by_category(cls) -> dict[str, list[str]]:
|
||||||
|
"""Get presets organized by category."""
|
||||||
|
categories: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
for name in cls.PRESETS:
|
||||||
|
if name.startswith("python"):
|
||||||
|
category = "Python"
|
||||||
|
elif name.startswith(("react", "js", "ts")):
|
||||||
|
category = "JavaScript/TypeScript"
|
||||||
|
elif name.startswith("go"):
|
||||||
|
category = "Go"
|
||||||
|
elif name.startswith("rust"):
|
||||||
|
category = "Rust"
|
||||||
|
elif name.startswith("java"):
|
||||||
|
category = "Java"
|
||||||
|
elif name.startswith("ruby"):
|
||||||
|
category = "Ruby"
|
||||||
|
elif name.startswith("docker"):
|
||||||
|
category = "Docker"
|
||||||
|
elif name.startswith("sql"):
|
||||||
|
category = "SQL"
|
||||||
|
else:
|
||||||
|
category = "Other"
|
||||||
|
|
||||||
|
if category not in categories:
|
||||||
|
categories[category] = []
|
||||||
|
categories[category].append(name)
|
||||||
|
|
||||||
|
return categories
|
||||||
|
|
||||||
|
|
||||||
|
class PatternMatcher:
|
||||||
|
"""Engine for matching patterns in code content."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
pattern: str,
|
||||||
|
flags: re.RegexFlag = re.MULTILINE,
|
||||||
|
) -> None:
|
||||||
|
"""Initialize the pattern matcher."""
|
||||||
|
self.pattern = pattern
|
||||||
|
self.flags = flags
|
||||||
|
self._compiled: Optional[re.Pattern[str]] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def compiled(self) -> re.Pattern[str]:
|
||||||
|
"""Get compiled regex pattern."""
|
||||||
|
if self._compiled is None:
|
||||||
|
try:
|
||||||
|
self._compiled = re.compile(self.pattern, self.flags)
|
||||||
|
except re.error as e:
|
||||||
|
raise ValueError(f"Invalid regex pattern: {e}")
|
||||||
|
return self._compiled
|
||||||
|
|
||||||
|
def matches_extension(self, file_path: str) -> bool:
|
||||||
|
"""Check if a file path extension should be searched."""
|
||||||
|
ext = Path(file_path).suffix.lower()
|
||||||
|
|
||||||
|
if not ext:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return ext in EXTENSION_TO_LANGUAGE
|
||||||
|
|
||||||
|
def get_language(self, file_path: str) -> Optional[str]:
|
||||||
|
"""Get the language of a file based on its extension."""
|
||||||
|
ext = Path(file_path).suffix.lower()
|
||||||
|
return EXTENSION_TO_LANGUAGE.get(ext)
|
||||||
|
|
||||||
|
def find_matches(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
file_path: str,
|
||||||
|
) -> list[MatchLocation]:
|
||||||
|
"""Find all pattern matches in content."""
|
||||||
|
matches: list[MatchLocation] = []
|
||||||
|
|
||||||
|
lines = content.split("\n")
|
||||||
|
|
||||||
|
for line_num, line in enumerate(lines, start=1):
|
||||||
|
for match in self.compiled.finditer(line):
|
||||||
|
matches.append(
|
||||||
|
MatchLocation(
|
||||||
|
file_path=file_path,
|
||||||
|
line_number=line_num,
|
||||||
|
line_content=line,
|
||||||
|
match_start=match.start(),
|
||||||
|
match_end=match.end(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def find_multiline_matches(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
file_path: str,
|
||||||
|
) -> list[MatchLocation]:
|
||||||
|
"""Find multiline pattern matches in content."""
|
||||||
|
matches: list[MatchLocation] = []
|
||||||
|
|
||||||
|
lines = content.split("\n")
|
||||||
|
|
||||||
|
for line_num, line in enumerate(lines, start=1):
|
||||||
|
if self.compiled.search(line):
|
||||||
|
matches.append(
|
||||||
|
MatchLocation(
|
||||||
|
file_path=file_path,
|
||||||
|
line_number=line_num,
|
||||||
|
line_content=line,
|
||||||
|
match_start=0,
|
||||||
|
match_end=len(line),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def count_matches(self, content: str) -> int:
|
||||||
|
"""Count total matches in content."""
|
||||||
|
return len(self.compiled.findall(content))
|
||||||
|
|
||||||
|
def has_matches(self, content: str) -> bool:
|
||||||
|
"""Check if content contains any matches."""
|
||||||
|
return self.compiled.search(content) is not None
|
||||||
|
|
||||||
|
def get_match_context(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
file_path: str,
|
||||||
|
context_lines: int = 2,
|
||||||
|
) -> list[FileMatch]:
|
||||||
|
"""Get matches with surrounding context."""
|
||||||
|
matches = self.find_matches(content, file_path)
|
||||||
|
|
||||||
|
if not matches:
|
||||||
|
return []
|
||||||
|
|
||||||
|
file_matches: list[FileMatch] = []
|
||||||
|
current_match_file: Optional[str] = None
|
||||||
|
current_matches: list[MatchLocation] = []
|
||||||
|
|
||||||
|
for match in matches:
|
||||||
|
if current_match_file != match.file_path:
|
||||||
|
if current_matches and current_match_file is not None:
|
||||||
|
file_matches.append(
|
||||||
|
FileMatch(
|
||||||
|
file_path=current_match_file,
|
||||||
|
matches=current_matches,
|
||||||
|
match_count=len(current_matches),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
current_match_file = match.file_path
|
||||||
|
current_matches = []
|
||||||
|
|
||||||
|
current_matches.append(match)
|
||||||
|
|
||||||
|
if current_matches and current_match_file is not None:
|
||||||
|
file_matches.append(
|
||||||
|
FileMatch(
|
||||||
|
file_path=current_match_file,
|
||||||
|
matches=current_matches,
|
||||||
|
match_count=len(current_matches),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return file_matches
|
||||||
Reference in New Issue
Block a user