import re from dataclasses import dataclass, field from typing import Optional, Dict, List from functools import lru_cache from errorfix.rules import Rule @dataclass class MatchResult: rule: Rule matched_text: str groups: Dict[str, str] = field(default_factory=dict) start_pos: int = 0 end_pos: int = 0 confidence: float = 1.0 def apply_fix(self, base_text: Optional[str] = None) -> str: fix = self.rule.fix for key, value in self.groups.items(): fix = fix.replace(f'{{{key}}}', value) fix = fix.replace(f'${{{key}}}', value) return fix class PatternMatcher: def __init__(self, cache_size: int = 256): self._compiled_patterns: Dict[str, re.Pattern] = {} self._cache_size = cache_size @lru_cache(maxsize=256) def _compile_pattern(self, pattern: str) -> re.Pattern: return re.compile(pattern, re.MULTILINE | re.IGNORECASE) def match(self, text: str, rule: Rule) -> Optional[MatchResult]: try: compiled = self._compile_pattern(rule.pattern) match = compiled.search(text) if match: groups = {k: v for k, v in match.groupdict().items() if v is not None} return MatchResult( rule=rule, matched_text=match.group(0), groups=groups, start_pos=match.start(), end_pos=match.end(), ) except re.error as e: print(f"Warning: Invalid pattern '{rule.pattern}': {e}") return None def match_all(self, text: str, rules: List[Rule], limit: Optional[int] = None) -> List[MatchResult]: results = [] for rule in rules: result = self.match(text, rule) if result: results.append(result) if limit and len(results) >= limit: break return results def match_with_priority(self, text: str, rules: List[Rule]) -> List[MatchResult]: sorted_rules = sorted(rules, key=lambda r: r.priority, reverse=True) return self.match_all(text, sorted_rules) def find_best_match(self, text: str, rules: List[Rule]) -> Optional[MatchResult]: matches = self.match_with_priority(text, rules) return matches[0] if matches else None def extract_variables(self, text: str, pattern: str) -> Dict[str, str]: try: compiled = self._compile_pattern(pattern) match = compiled.search(text) if match: return {k: v for k, v in match.groupdict().items() if v is not None} except re.error: pass return {} def replace_variables(self, template: str, variables: Dict[str, str]) -> str: result = template for key, value in variables.items(): result = result.replace(f'{{{key}}}', value) result = result.replace(f'${{{key}}}', value) return result def clear_cache(self): self._compile_pattern.cache_clear() self._compiled_patterns.clear()