208 lines
5.8 KiB
Python
208 lines
5.8 KiB
Python
"""Flavor support system for different regex flavors."""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from typing import Optional
|
|
import re
|
|
|
|
|
|
class RegexFlavor(ABC):
|
|
"""Base class for regex flavors."""
|
|
|
|
@property
|
|
@abstractmethod
|
|
def name(self) -> str:
|
|
"""Return the flavor name."""
|
|
pass
|
|
|
|
@property
|
|
@abstractmethod
|
|
def description(self) -> str:
|
|
"""Return a description of the flavor."""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def normalize(self, pattern: str) -> tuple[str, list[str]]:
|
|
"""Normalize a pattern to this flavor, returning warnings."""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_flags(self) -> int:
|
|
"""Return regex flags for this flavor."""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def supports_feature(self, feature: str) -> bool:
|
|
"""Check if a feature is supported."""
|
|
pass
|
|
|
|
|
|
class PCREFlavor(RegexFlavor):
|
|
"""PCRE (Perl Compatible Regular Expressions) flavor."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "pcre"
|
|
|
|
@property
|
|
def description(self) -> str:
|
|
return "PCRE - Full feature set with possessive quantifiers, lookbehinds, and all Perl extensions"
|
|
|
|
def normalize(self, pattern: str) -> tuple[str, list[str]]:
|
|
warnings = []
|
|
normalized = pattern
|
|
return normalized, warnings
|
|
|
|
def get_flags(self) -> int:
|
|
return re.MULTILINE
|
|
|
|
def supports_feature(self, feature: str) -> bool:
|
|
supported = {
|
|
"lookahead": True,
|
|
"lookbehind": True,
|
|
"named_groups": True,
|
|
"non_capturing_groups": True,
|
|
"possessive_quantifiers": True,
|
|
"atomic_groups": True,
|
|
"comment_syntax": True,
|
|
"inline_flags": True,
|
|
"recursion": True,
|
|
"subroutine_references": True,
|
|
}
|
|
return supported.get(feature, False)
|
|
|
|
|
|
class JavaScriptFlavor(RegexFlavor):
|
|
"""JavaScript regex flavor."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "javascript"
|
|
|
|
@property
|
|
def description(self) -> str:
|
|
return "JavaScript/ECMAScript - Limited lookbehind support, dotAll flag needed for . matching newlines"
|
|
|
|
def normalize(self, pattern: str) -> tuple[str, list[str]]:
|
|
warnings = []
|
|
normalized = pattern
|
|
|
|
normalized = normalized.replace("(?P<", "(?<")
|
|
while "\\k<" in normalized:
|
|
normalized = normalized.replace("\\k<", "\\k")
|
|
|
|
warnings.append("Note: Some PCRE features may not work in JavaScript")
|
|
|
|
return normalized, warnings
|
|
|
|
def get_flags(self) -> int:
|
|
return 0
|
|
|
|
def supports_feature(self, feature: str) -> bool:
|
|
supported = {
|
|
"lookahead": True,
|
|
"lookbehind": True,
|
|
"named_groups": True,
|
|
"non_capturing_groups": True,
|
|
"possessive_quantifiers": False,
|
|
"atomic_groups": False,
|
|
"comment_syntax": False,
|
|
"inline_flags": False,
|
|
"recursion": False,
|
|
"subroutine_references": False,
|
|
}
|
|
return supported.get(feature, False)
|
|
|
|
|
|
class PythonFlavor(RegexFlavor):
|
|
"""Python re module regex flavor."""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "python"
|
|
|
|
@property
|
|
def description(self) -> str:
|
|
return "Python re module - Full Unicode support, named groups, and most PCRE features"
|
|
|
|
def normalize(self, pattern: str) -> tuple[str, list[str]]:
|
|
warnings = []
|
|
normalized = pattern
|
|
|
|
normalized = normalized.replace("(?P<", "(?<")
|
|
|
|
return normalized, warnings
|
|
|
|
def get_flags(self) -> int:
|
|
return re.MULTILINE | re.UNICODE
|
|
|
|
def supports_feature(self, feature: str) -> bool:
|
|
supported = {
|
|
"lookahead": True,
|
|
"lookbehind": True,
|
|
"named_groups": True,
|
|
"non_capturing_groups": True,
|
|
"possessive_quantifiers": False,
|
|
"atomic_groups": False,
|
|
"comment_syntax": True,
|
|
"inline_flags": True,
|
|
"recursion": False,
|
|
"subroutine_references": False,
|
|
}
|
|
return supported.get(feature, False)
|
|
|
|
|
|
class FlavorManager:
|
|
"""Manages regex flavors and their adapters."""
|
|
|
|
def __init__(self):
|
|
self._flavors: dict[str, RegexFlavor] = {}
|
|
self._register_default_flavors()
|
|
|
|
def _register_default_flavors(self):
|
|
"""Register the default flavors."""
|
|
self.register_flavor(PCREFlavor())
|
|
self.register_flavor(JavaScriptFlavor())
|
|
self.register_flavor(PythonFlavor())
|
|
|
|
def register_flavor(self, flavor: RegexFlavor):
|
|
"""Register a new flavor."""
|
|
self._flavors[flavor.name] = flavor
|
|
|
|
def get_flavor(self, name: str) -> Optional[RegexFlavor]:
|
|
"""Get a flavor by name."""
|
|
return self._flavors.get(name)
|
|
|
|
def list_flavors(self) -> list[tuple[str, str]]:
|
|
"""List all available flavors."""
|
|
return [(name, flavor.description) for name, flavor in self._flavors.items()]
|
|
|
|
def convert(
|
|
self,
|
|
pattern: str,
|
|
from_flavor: str,
|
|
to_flavor: str
|
|
) -> tuple[str, list[str]]:
|
|
"""Convert a pattern from one flavor to another."""
|
|
source = self.get_flavor(from_flavor)
|
|
target = self.get_flavor(to_flavor)
|
|
|
|
if not source:
|
|
return pattern, [f"Unknown source flavor: {from_flavor}"]
|
|
if not target:
|
|
return pattern, [f"Unknown target flavor: {to_flavor}"]
|
|
|
|
normalized, warnings = source.normalize(pattern)
|
|
result, convert_warnings = target.normalize(normalized)
|
|
|
|
return result, warnings + convert_warnings
|
|
|
|
|
|
def get_flavor_manager() -> FlavorManager:
|
|
"""Get the global flavor manager instance."""
|
|
return FlavorManager()
|
|
|
|
|
|
def get_available_flavors() -> list[str]:
|
|
"""Get a list of available flavor names."""
|
|
return ["pcre", "javascript", "python"]
|