Add converter, examples, and flavors modules
This commit is contained in:
254
regex_humanizer/flavors/registry.py
Normal file
254
regex_humanizer/flavors/registry.py
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Dict, List, Optional, Set
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Flavor:
|
||||||
|
name: str
|
||||||
|
display_name: str
|
||||||
|
description: str
|
||||||
|
supported_features: Set[str] = field(default_factory=set)
|
||||||
|
unsupported_features: Set[str] = field(default_factory=set)
|
||||||
|
quirks: Dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FlavorWarning:
|
||||||
|
feature: str
|
||||||
|
message: str
|
||||||
|
severity: str = "warning"
|
||||||
|
|
||||||
|
|
||||||
|
class FlavorRegistry:
|
||||||
|
def __init__(self):
|
||||||
|
self._flavors: Dict[str, Flavor] = {}
|
||||||
|
self._register_default_flavors()
|
||||||
|
|
||||||
|
def _register_default_flavors(self):
|
||||||
|
common_features = {
|
||||||
|
"literals",
|
||||||
|
"character_classes",
|
||||||
|
"quantifiers_basic",
|
||||||
|
"anchors_basic",
|
||||||
|
"groups_capturing",
|
||||||
|
"groups_non_capturing",
|
||||||
|
"alternation",
|
||||||
|
"escaping",
|
||||||
|
"dot_any",
|
||||||
|
"word_boundary",
|
||||||
|
}
|
||||||
|
|
||||||
|
pcre = Flavor(
|
||||||
|
name="pcre",
|
||||||
|
display_name="PCRE",
|
||||||
|
description="Perl Compatible Regular Expressions",
|
||||||
|
supported_features=common_features | {
|
||||||
|
"lookahead",
|
||||||
|
"lookbehind",
|
||||||
|
"possessive_quantifiers",
|
||||||
|
"atomic_groups",
|
||||||
|
"named_groups",
|
||||||
|
"recursive_patterns",
|
||||||
|
"conditional_patterns",
|
||||||
|
"comment_syntax",
|
||||||
|
"callouts",
|
||||||
|
},
|
||||||
|
quirks={
|
||||||
|
"possessive_quantifiers": "Use ++, *+, ?+ syntax",
|
||||||
|
"dot_all": "Use (?s) modifier for dot to match newlines",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
javascript = Flavor(
|
||||||
|
name="javascript",
|
||||||
|
display_name="JavaScript",
|
||||||
|
description="JavaScript RegExp",
|
||||||
|
supported_features=common_features | {
|
||||||
|
"lookahead",
|
||||||
|
"named_groups",
|
||||||
|
"dot_all",
|
||||||
|
},
|
||||||
|
unsupported_features={
|
||||||
|
"lookbehind",
|
||||||
|
"possessive_quantifiers",
|
||||||
|
"atomic_groups",
|
||||||
|
"recursive_patterns",
|
||||||
|
"conditional_patterns",
|
||||||
|
"callouts",
|
||||||
|
},
|
||||||
|
quirks={
|
||||||
|
"lookbehind": "Not supported in JavaScript",
|
||||||
|
"possessive_quantifiers": "Not supported in JavaScript",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
python = Flavor(
|
||||||
|
name="python",
|
||||||
|
display_name="Python",
|
||||||
|
description="Python re module",
|
||||||
|
supported_features=common_features | {
|
||||||
|
"lookahead",
|
||||||
|
"lookbehind",
|
||||||
|
"named_groups",
|
||||||
|
"dot_all",
|
||||||
|
},
|
||||||
|
unsupported_features={
|
||||||
|
"possessive_quantifiers",
|
||||||
|
"atomic_groups",
|
||||||
|
"recursive_patterns",
|
||||||
|
"conditional_patterns",
|
||||||
|
"callouts",
|
||||||
|
},
|
||||||
|
quirks={
|
||||||
|
"possessive_quantifiers": "Use atomic groups or possessive++ equivalent not available",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
go = Flavor(
|
||||||
|
name="go",
|
||||||
|
display_name="Go",
|
||||||
|
description="Go regexp (RE2)",
|
||||||
|
supported_features=common_features | {
|
||||||
|
"lookahead",
|
||||||
|
"named_groups",
|
||||||
|
},
|
||||||
|
unsupported_features={
|
||||||
|
"lookbehind",
|
||||||
|
"possessive_quantifiers",
|
||||||
|
"atomic_groups",
|
||||||
|
"recursive_patterns",
|
||||||
|
"conditional_patterns",
|
||||||
|
"callouts",
|
||||||
|
"backreferences_general",
|
||||||
|
"named_backreferences",
|
||||||
|
},
|
||||||
|
quirks={
|
||||||
|
"lookbehind": "Not supported in Go's RE2 engine",
|
||||||
|
"backreferences": "Only supports numbered backreferences",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
self._flavors["pcre"] = pcre
|
||||||
|
self._flavors["javascript"] = javascript
|
||||||
|
self._flavors["python"] = python
|
||||||
|
self._flavors["go"] = go
|
||||||
|
|
||||||
|
def register(self, flavor: Flavor):
|
||||||
|
self._flavors[flavor.name] = flavor
|
||||||
|
|
||||||
|
def get(self, name: str) -> Optional[Flavor]:
|
||||||
|
return self._flavors.get(name.lower())
|
||||||
|
|
||||||
|
def list_all(self) -> List[Flavor]:
|
||||||
|
return list(self._flavors.values())
|
||||||
|
|
||||||
|
def list_names(self) -> List[str]:
|
||||||
|
return list(self._flavors.keys())
|
||||||
|
|
||||||
|
|
||||||
|
_registry = FlavorRegistry()
|
||||||
|
|
||||||
|
|
||||||
|
def get_flavor(name: str) -> Optional[Flavor]:
|
||||||
|
return _registry.get(name)
|
||||||
|
|
||||||
|
|
||||||
|
def get_supported_flavors() -> List[str]:
|
||||||
|
return _registry.list_names()
|
||||||
|
|
||||||
|
|
||||||
|
def validate_flavor(name: str) -> bool:
|
||||||
|
return _registry.get(name) is not None
|
||||||
|
|
||||||
|
|
||||||
|
def detect_flavor(pattern: str) -> str:
|
||||||
|
if r"\k<" in pattern:
|
||||||
|
if r"(?<![" in pattern or r"(?<=[" in pattern:
|
||||||
|
return "python"
|
||||||
|
return "pcre"
|
||||||
|
|
||||||
|
if r"(?P<" in pattern:
|
||||||
|
if r"(?<![" in pattern or r"(?<=[" in pattern:
|
||||||
|
return "pcre"
|
||||||
|
return "python"
|
||||||
|
|
||||||
|
if r"(?<![" in pattern or r"(?<=[" in pattern:
|
||||||
|
return "python"
|
||||||
|
|
||||||
|
if r"(?!" in pattern or r"(?=" in pattern:
|
||||||
|
if r"(?<![" in pattern or r"(?<=[" in pattern:
|
||||||
|
return "python"
|
||||||
|
if r"\+\+" in pattern or r"\*\+" in pattern or r"\?\+" in pattern:
|
||||||
|
return "pcre"
|
||||||
|
return "javascript"
|
||||||
|
|
||||||
|
if r"\+\+" in pattern or r"\*\+" in pattern or r"\?\+" in pattern:
|
||||||
|
return "pcre"
|
||||||
|
|
||||||
|
if r"(?s)" in pattern:
|
||||||
|
return "javascript"
|
||||||
|
|
||||||
|
return "pcre"
|
||||||
|
|
||||||
|
|
||||||
|
FEATURE_PATTERNS = {
|
||||||
|
"lookahead": [r"\(?=", r"\(?!"],
|
||||||
|
"lookbehind": [r"\(?<=", r"\(?<!"],
|
||||||
|
"named_groups": [r"\(\?P<", r"\(\?<"],
|
||||||
|
"named_backreferences": [r"\\k<"],
|
||||||
|
"possessive_quantifiers": [r"\+\+", r"\*\+", r"\?\+"],
|
||||||
|
"atomic_groups": [r"\(\?>", r"\(\?\+"],
|
||||||
|
"recursive_patterns": [r"\(\?R", r"\(\?\)", r"\(\?-"],
|
||||||
|
"conditional_patterns": [r"\(\?\(", r"\(\?\|"],
|
||||||
|
"callouts": [r"\(\?#", r"\(\?C"],
|
||||||
|
"comment_syntax": [r"\(\?#"],
|
||||||
|
"dot_all": [r"\(\?s", r"\(?s\)"],
|
||||||
|
"backreferences_general": [r"\\\d+"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def check_feature_support(pattern: str, flavor: str) -> List[str]:
|
||||||
|
flavor_obj = get_flavor(flavor)
|
||||||
|
if not flavor_obj:
|
||||||
|
return []
|
||||||
|
|
||||||
|
unsupported = []
|
||||||
|
for feature, patterns in FEATURE_PATTERNS.items():
|
||||||
|
for pattern_regex in patterns:
|
||||||
|
if re.search(pattern_regex, pattern):
|
||||||
|
if feature in flavor_obj.unsupported_features:
|
||||||
|
unsupported.append(feature)
|
||||||
|
break
|
||||||
|
|
||||||
|
return unsupported
|
||||||
|
|
||||||
|
|
||||||
|
def get_compatibility_warnings(pattern: str, flavor: str) -> List[FlavorWarning]:
|
||||||
|
flavor_obj = get_flavor(flavor)
|
||||||
|
if not flavor_obj:
|
||||||
|
return [FlavorWarning(
|
||||||
|
feature="unknown_flavor",
|
||||||
|
message=f"Unknown flavor: {flavor}",
|
||||||
|
severity="error",
|
||||||
|
)]
|
||||||
|
|
||||||
|
warnings = []
|
||||||
|
|
||||||
|
unsupported = check_feature_support(pattern, flavor)
|
||||||
|
|
||||||
|
for feature in unsupported:
|
||||||
|
if feature in flavor_obj.quirks:
|
||||||
|
warnings.append(FlavorWarning(
|
||||||
|
feature=feature,
|
||||||
|
message=flavor_obj.quirks[feature],
|
||||||
|
severity="warning",
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
warnings.append(FlavorWarning(
|
||||||
|
feature=feature,
|
||||||
|
message=f"Feature '{feature}' is not supported in {flavor_obj.display_name}",
|
||||||
|
severity="error",
|
||||||
|
))
|
||||||
|
|
||||||
|
return warnings
|
||||||
Reference in New Issue
Block a user