diff --git a/regex_humanizer/flavors/registry.py b/regex_humanizer/flavors/registry.py new file mode 100644 index 0000000..370f1e3 --- /dev/null +++ b/regex_humanizer/flavors/registry.py @@ -0,0 +1,254 @@ +import re +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Set + + +@dataclass +class Flavor: + name: str + display_name: str + description: str + supported_features: Set[str] = field(default_factory=set) + unsupported_features: Set[str] = field(default_factory=set) + quirks: Dict[str, str] = field(default_factory=dict) + + +@dataclass +class FlavorWarning: + feature: str + message: str + severity: str = "warning" + + +class FlavorRegistry: + def __init__(self): + self._flavors: Dict[str, Flavor] = {} + self._register_default_flavors() + + def _register_default_flavors(self): + common_features = { + "literals", + "character_classes", + "quantifiers_basic", + "anchors_basic", + "groups_capturing", + "groups_non_capturing", + "alternation", + "escaping", + "dot_any", + "word_boundary", + } + + pcre = Flavor( + name="pcre", + display_name="PCRE", + description="Perl Compatible Regular Expressions", + supported_features=common_features | { + "lookahead", + "lookbehind", + "possessive_quantifiers", + "atomic_groups", + "named_groups", + "recursive_patterns", + "conditional_patterns", + "comment_syntax", + "callouts", + }, + quirks={ + "possessive_quantifiers": "Use ++, *+, ?+ syntax", + "dot_all": "Use (?s) modifier for dot to match newlines", + }, + ) + + javascript = Flavor( + name="javascript", + display_name="JavaScript", + description="JavaScript RegExp", + supported_features=common_features | { + "lookahead", + "named_groups", + "dot_all", + }, + unsupported_features={ + "lookbehind", + "possessive_quantifiers", + "atomic_groups", + "recursive_patterns", + "conditional_patterns", + "callouts", + }, + quirks={ + "lookbehind": "Not supported in JavaScript", + "possessive_quantifiers": "Not supported in JavaScript", + }, + ) + + python = Flavor( + name="python", + display_name="Python", + description="Python re module", + supported_features=common_features | { + "lookahead", + "lookbehind", + "named_groups", + "dot_all", + }, + unsupported_features={ + "possessive_quantifiers", + "atomic_groups", + "recursive_patterns", + "conditional_patterns", + "callouts", + }, + quirks={ + "possessive_quantifiers": "Use atomic groups or possessive++ equivalent not available", + }, + ) + + go = Flavor( + name="go", + display_name="Go", + description="Go regexp (RE2)", + supported_features=common_features | { + "lookahead", + "named_groups", + }, + unsupported_features={ + "lookbehind", + "possessive_quantifiers", + "atomic_groups", + "recursive_patterns", + "conditional_patterns", + "callouts", + "backreferences_general", + "named_backreferences", + }, + quirks={ + "lookbehind": "Not supported in Go's RE2 engine", + "backreferences": "Only supports numbered backreferences", + }, + ) + + self._flavors["pcre"] = pcre + self._flavors["javascript"] = javascript + self._flavors["python"] = python + self._flavors["go"] = go + + def register(self, flavor: Flavor): + self._flavors[flavor.name] = flavor + + def get(self, name: str) -> Optional[Flavor]: + return self._flavors.get(name.lower()) + + def list_all(self) -> List[Flavor]: + return list(self._flavors.values()) + + def list_names(self) -> List[str]: + return list(self._flavors.keys()) + + +_registry = FlavorRegistry() + + +def get_flavor(name: str) -> Optional[Flavor]: + return _registry.get(name) + + +def get_supported_flavors() -> List[str]: + return _registry.list_names() + + +def validate_flavor(name: str) -> bool: + return _registry.get(name) is not None + + +def detect_flavor(pattern: str) -> str: + if r"\k<" in pattern: + if r"(?", r"\(\?\+"], + "recursive_patterns": [r"\(\?R", r"\(\?\)", r"\(\?-"], + "conditional_patterns": [r"\(\?\(", r"\(\?\|"], + "callouts": [r"\(\?#", r"\(\?C"], + "comment_syntax": [r"\(\?#"], + "dot_all": [r"\(\?s", r"\(?s\)"], + "backreferences_general": [r"\\\d+"], +} + + +def check_feature_support(pattern: str, flavor: str) -> List[str]: + flavor_obj = get_flavor(flavor) + if not flavor_obj: + return [] + + unsupported = [] + for feature, patterns in FEATURE_PATTERNS.items(): + for pattern_regex in patterns: + if re.search(pattern_regex, pattern): + if feature in flavor_obj.unsupported_features: + unsupported.append(feature) + break + + return unsupported + + +def get_compatibility_warnings(pattern: str, flavor: str) -> List[FlavorWarning]: + flavor_obj = get_flavor(flavor) + if not flavor_obj: + return [FlavorWarning( + feature="unknown_flavor", + message=f"Unknown flavor: {flavor}", + severity="error", + )] + + warnings = [] + + unsupported = check_feature_support(pattern, flavor) + + for feature in unsupported: + if feature in flavor_obj.quirks: + warnings.append(FlavorWarning( + feature=feature, + message=flavor_obj.quirks[feature], + severity="warning", + )) + else: + warnings.append(FlavorWarning( + feature=feature, + message=f"Feature '{feature}' is not supported in {flavor_obj.display_name}", + severity="error", + )) + + return warnings