Add converter, examples, and flavors modules
This commit is contained in:
254
regex_humanizer/flavors/registry.py
Normal file
254
regex_humanizer/flavors/registry.py
Normal file
@@ -0,0 +1,254 @@
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional, Set
|
||||
|
||||
|
||||
@dataclass
|
||||
class Flavor:
|
||||
name: str
|
||||
display_name: str
|
||||
description: str
|
||||
supported_features: Set[str] = field(default_factory=set)
|
||||
unsupported_features: Set[str] = field(default_factory=set)
|
||||
quirks: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FlavorWarning:
|
||||
feature: str
|
||||
message: str
|
||||
severity: str = "warning"
|
||||
|
||||
|
||||
class FlavorRegistry:
|
||||
def __init__(self):
|
||||
self._flavors: Dict[str, Flavor] = {}
|
||||
self._register_default_flavors()
|
||||
|
||||
def _register_default_flavors(self):
|
||||
common_features = {
|
||||
"literals",
|
||||
"character_classes",
|
||||
"quantifiers_basic",
|
||||
"anchors_basic",
|
||||
"groups_capturing",
|
||||
"groups_non_capturing",
|
||||
"alternation",
|
||||
"escaping",
|
||||
"dot_any",
|
||||
"word_boundary",
|
||||
}
|
||||
|
||||
pcre = Flavor(
|
||||
name="pcre",
|
||||
display_name="PCRE",
|
||||
description="Perl Compatible Regular Expressions",
|
||||
supported_features=common_features | {
|
||||
"lookahead",
|
||||
"lookbehind",
|
||||
"possessive_quantifiers",
|
||||
"atomic_groups",
|
||||
"named_groups",
|
||||
"recursive_patterns",
|
||||
"conditional_patterns",
|
||||
"comment_syntax",
|
||||
"callouts",
|
||||
},
|
||||
quirks={
|
||||
"possessive_quantifiers": "Use ++, *+, ?+ syntax",
|
||||
"dot_all": "Use (?s) modifier for dot to match newlines",
|
||||
},
|
||||
)
|
||||
|
||||
javascript = Flavor(
|
||||
name="javascript",
|
||||
display_name="JavaScript",
|
||||
description="JavaScript RegExp",
|
||||
supported_features=common_features | {
|
||||
"lookahead",
|
||||
"named_groups",
|
||||
"dot_all",
|
||||
},
|
||||
unsupported_features={
|
||||
"lookbehind",
|
||||
"possessive_quantifiers",
|
||||
"atomic_groups",
|
||||
"recursive_patterns",
|
||||
"conditional_patterns",
|
||||
"callouts",
|
||||
},
|
||||
quirks={
|
||||
"lookbehind": "Not supported in JavaScript",
|
||||
"possessive_quantifiers": "Not supported in JavaScript",
|
||||
},
|
||||
)
|
||||
|
||||
python = Flavor(
|
||||
name="python",
|
||||
display_name="Python",
|
||||
description="Python re module",
|
||||
supported_features=common_features | {
|
||||
"lookahead",
|
||||
"lookbehind",
|
||||
"named_groups",
|
||||
"dot_all",
|
||||
},
|
||||
unsupported_features={
|
||||
"possessive_quantifiers",
|
||||
"atomic_groups",
|
||||
"recursive_patterns",
|
||||
"conditional_patterns",
|
||||
"callouts",
|
||||
},
|
||||
quirks={
|
||||
"possessive_quantifiers": "Use atomic groups or possessive++ equivalent not available",
|
||||
},
|
||||
)
|
||||
|
||||
go = Flavor(
|
||||
name="go",
|
||||
display_name="Go",
|
||||
description="Go regexp (RE2)",
|
||||
supported_features=common_features | {
|
||||
"lookahead",
|
||||
"named_groups",
|
||||
},
|
||||
unsupported_features={
|
||||
"lookbehind",
|
||||
"possessive_quantifiers",
|
||||
"atomic_groups",
|
||||
"recursive_patterns",
|
||||
"conditional_patterns",
|
||||
"callouts",
|
||||
"backreferences_general",
|
||||
"named_backreferences",
|
||||
},
|
||||
quirks={
|
||||
"lookbehind": "Not supported in Go's RE2 engine",
|
||||
"backreferences": "Only supports numbered backreferences",
|
||||
},
|
||||
)
|
||||
|
||||
self._flavors["pcre"] = pcre
|
||||
self._flavors["javascript"] = javascript
|
||||
self._flavors["python"] = python
|
||||
self._flavors["go"] = go
|
||||
|
||||
def register(self, flavor: Flavor):
|
||||
self._flavors[flavor.name] = flavor
|
||||
|
||||
def get(self, name: str) -> Optional[Flavor]:
|
||||
return self._flavors.get(name.lower())
|
||||
|
||||
def list_all(self) -> List[Flavor]:
|
||||
return list(self._flavors.values())
|
||||
|
||||
def list_names(self) -> List[str]:
|
||||
return list(self._flavors.keys())
|
||||
|
||||
|
||||
_registry = FlavorRegistry()
|
||||
|
||||
|
||||
def get_flavor(name: str) -> Optional[Flavor]:
|
||||
return _registry.get(name)
|
||||
|
||||
|
||||
def get_supported_flavors() -> List[str]:
|
||||
return _registry.list_names()
|
||||
|
||||
|
||||
def validate_flavor(name: str) -> bool:
|
||||
return _registry.get(name) is not None
|
||||
|
||||
|
||||
def detect_flavor(pattern: str) -> str:
|
||||
if r"\k<" in pattern:
|
||||
if r"(?<![" in pattern or r"(?<=[" in pattern:
|
||||
return "python"
|
||||
return "pcre"
|
||||
|
||||
if r"(?P<" in pattern:
|
||||
if r"(?<![" in pattern or r"(?<=[" in pattern:
|
||||
return "pcre"
|
||||
return "python"
|
||||
|
||||
if r"(?<![" in pattern or r"(?<=[" in pattern:
|
||||
return "python"
|
||||
|
||||
if r"(?!" in pattern or r"(?=" in pattern:
|
||||
if r"(?<![" in pattern or r"(?<=[" in pattern:
|
||||
return "python"
|
||||
if r"\+\+" in pattern or r"\*\+" in pattern or r"\?\+" in pattern:
|
||||
return "pcre"
|
||||
return "javascript"
|
||||
|
||||
if r"\+\+" in pattern or r"\*\+" in pattern or r"\?\+" in pattern:
|
||||
return "pcre"
|
||||
|
||||
if r"(?s)" in pattern:
|
||||
return "javascript"
|
||||
|
||||
return "pcre"
|
||||
|
||||
|
||||
FEATURE_PATTERNS = {
|
||||
"lookahead": [r"\(?=", r"\(?!"],
|
||||
"lookbehind": [r"\(?<=", r"\(?<!"],
|
||||
"named_groups": [r"\(\?P<", r"\(\?<"],
|
||||
"named_backreferences": [r"\\k<"],
|
||||
"possessive_quantifiers": [r"\+\+", r"\*\+", r"\?\+"],
|
||||
"atomic_groups": [r"\(\?>", r"\(\?\+"],
|
||||
"recursive_patterns": [r"\(\?R", r"\(\?\)", r"\(\?-"],
|
||||
"conditional_patterns": [r"\(\?\(", r"\(\?\|"],
|
||||
"callouts": [r"\(\?#", r"\(\?C"],
|
||||
"comment_syntax": [r"\(\?#"],
|
||||
"dot_all": [r"\(\?s", r"\(?s\)"],
|
||||
"backreferences_general": [r"\\\d+"],
|
||||
}
|
||||
|
||||
|
||||
def check_feature_support(pattern: str, flavor: str) -> List[str]:
|
||||
flavor_obj = get_flavor(flavor)
|
||||
if not flavor_obj:
|
||||
return []
|
||||
|
||||
unsupported = []
|
||||
for feature, patterns in FEATURE_PATTERNS.items():
|
||||
for pattern_regex in patterns:
|
||||
if re.search(pattern_regex, pattern):
|
||||
if feature in flavor_obj.unsupported_features:
|
||||
unsupported.append(feature)
|
||||
break
|
||||
|
||||
return unsupported
|
||||
|
||||
|
||||
def get_compatibility_warnings(pattern: str, flavor: str) -> List[FlavorWarning]:
|
||||
flavor_obj = get_flavor(flavor)
|
||||
if not flavor_obj:
|
||||
return [FlavorWarning(
|
||||
feature="unknown_flavor",
|
||||
message=f"Unknown flavor: {flavor}",
|
||||
severity="error",
|
||||
)]
|
||||
|
||||
warnings = []
|
||||
|
||||
unsupported = check_feature_support(pattern, flavor)
|
||||
|
||||
for feature in unsupported:
|
||||
if feature in flavor_obj.quirks:
|
||||
warnings.append(FlavorWarning(
|
||||
feature=feature,
|
||||
message=flavor_obj.quirks[feature],
|
||||
severity="warning",
|
||||
))
|
||||
else:
|
||||
warnings.append(FlavorWarning(
|
||||
feature=feature,
|
||||
message=f"Feature '{feature}' is not supported in {flavor_obj.display_name}",
|
||||
severity="error",
|
||||
))
|
||||
|
||||
return warnings
|
||||
Reference in New Issue
Block a user