Files
7000pctAUTO e315750b2f
Some checks failed
CI / test (push) Failing after 12s
CI / build (push) Has been skipped
Add converter, examples, and flavors modules
2026-02-02 06:26:52 +00:00

255 lines
7.2 KiB
Python

import re
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Set
@dataclass
class Flavor:
name: str
display_name: str
description: str
supported_features: Set[str] = field(default_factory=set)
unsupported_features: Set[str] = field(default_factory=set)
quirks: Dict[str, str] = field(default_factory=dict)
@dataclass
class FlavorWarning:
feature: str
message: str
severity: str = "warning"
class FlavorRegistry:
def __init__(self):
self._flavors: Dict[str, Flavor] = {}
self._register_default_flavors()
def _register_default_flavors(self):
common_features = {
"literals",
"character_classes",
"quantifiers_basic",
"anchors_basic",
"groups_capturing",
"groups_non_capturing",
"alternation",
"escaping",
"dot_any",
"word_boundary",
}
pcre = Flavor(
name="pcre",
display_name="PCRE",
description="Perl Compatible Regular Expressions",
supported_features=common_features | {
"lookahead",
"lookbehind",
"possessive_quantifiers",
"atomic_groups",
"named_groups",
"recursive_patterns",
"conditional_patterns",
"comment_syntax",
"callouts",
},
quirks={
"possessive_quantifiers": "Use ++, *+, ?+ syntax",
"dot_all": "Use (?s) modifier for dot to match newlines",
},
)
javascript = Flavor(
name="javascript",
display_name="JavaScript",
description="JavaScript RegExp",
supported_features=common_features | {
"lookahead",
"named_groups",
"dot_all",
},
unsupported_features={
"lookbehind",
"possessive_quantifiers",
"atomic_groups",
"recursive_patterns",
"conditional_patterns",
"callouts",
},
quirks={
"lookbehind": "Not supported in JavaScript",
"possessive_quantifiers": "Not supported in JavaScript",
},
)
python = Flavor(
name="python",
display_name="Python",
description="Python re module",
supported_features=common_features | {
"lookahead",
"lookbehind",
"named_groups",
"dot_all",
},
unsupported_features={
"possessive_quantifiers",
"atomic_groups",
"recursive_patterns",
"conditional_patterns",
"callouts",
},
quirks={
"possessive_quantifiers": "Use atomic groups or possessive++ equivalent not available",
},
)
go = Flavor(
name="go",
display_name="Go",
description="Go regexp (RE2)",
supported_features=common_features | {
"lookahead",
"named_groups",
},
unsupported_features={
"lookbehind",
"possessive_quantifiers",
"atomic_groups",
"recursive_patterns",
"conditional_patterns",
"callouts",
"backreferences_general",
"named_backreferences",
},
quirks={
"lookbehind": "Not supported in Go's RE2 engine",
"backreferences": "Only supports numbered backreferences",
},
)
self._flavors["pcre"] = pcre
self._flavors["javascript"] = javascript
self._flavors["python"] = python
self._flavors["go"] = go
def register(self, flavor: Flavor):
self._flavors[flavor.name] = flavor
def get(self, name: str) -> Optional[Flavor]:
return self._flavors.get(name.lower())
def list_all(self) -> List[Flavor]:
return list(self._flavors.values())
def list_names(self) -> List[str]:
return list(self._flavors.keys())
_registry = FlavorRegistry()
def get_flavor(name: str) -> Optional[Flavor]:
return _registry.get(name)
def get_supported_flavors() -> List[str]:
return _registry.list_names()
def validate_flavor(name: str) -> bool:
return _registry.get(name) is not None
def detect_flavor(pattern: str) -> str:
if r"\k<" in pattern:
if r"(?<![" in pattern or r"(?<=[" in pattern:
return "python"
return "pcre"
if r"(?P<" in pattern:
if r"(?<![" in pattern or r"(?<=[" in pattern:
return "pcre"
return "python"
if r"(?<![" in pattern or r"(?<=[" in pattern:
return "python"
if r"(?!" in pattern or r"(?=" in pattern:
if r"(?<![" in pattern or r"(?<=[" in pattern:
return "python"
if r"\+\+" in pattern or r"\*\+" in pattern or r"\?\+" in pattern:
return "pcre"
return "javascript"
if r"\+\+" in pattern or r"\*\+" in pattern or r"\?\+" in pattern:
return "pcre"
if r"(?s)" in pattern:
return "javascript"
return "pcre"
FEATURE_PATTERNS = {
"lookahead": [r"\(?=", r"\(?!"],
"lookbehind": [r"\(?<=", r"\(?<!"],
"named_groups": [r"\(\?P<", r"\(\?<"],
"named_backreferences": [r"\\k<"],
"possessive_quantifiers": [r"\+\+", r"\*\+", r"\?\+"],
"atomic_groups": [r"\(\?>", r"\(\?\+"],
"recursive_patterns": [r"\(\?R", r"\(\?\)", r"\(\?-"],
"conditional_patterns": [r"\(\?\(", r"\(\?\|"],
"callouts": [r"\(\?#", r"\(\?C"],
"comment_syntax": [r"\(\?#"],
"dot_all": [r"\(\?s", r"\(?s\)"],
"backreferences_general": [r"\\\d+"],
}
def check_feature_support(pattern: str, flavor: str) -> List[str]:
flavor_obj = get_flavor(flavor)
if not flavor_obj:
return []
unsupported = []
for feature, patterns in FEATURE_PATTERNS.items():
for pattern_regex in patterns:
if re.search(pattern_regex, pattern):
if feature in flavor_obj.unsupported_features:
unsupported.append(feature)
break
return unsupported
def get_compatibility_warnings(pattern: str, flavor: str) -> List[FlavorWarning]:
flavor_obj = get_flavor(flavor)
if not flavor_obj:
return [FlavorWarning(
feature="unknown_flavor",
message=f"Unknown flavor: {flavor}",
severity="error",
)]
warnings = []
unsupported = check_feature_support(pattern, flavor)
for feature in unsupported:
if feature in flavor_obj.quirks:
warnings.append(FlavorWarning(
feature=feature,
message=flavor_obj.quirks[feature],
severity="warning",
))
else:
warnings.append(FlavorWarning(
feature=feature,
message=f"Feature '{feature}' is not supported in {flavor_obj.display_name}",
severity="error",
))
return warnings