fix: add type annotations to converter files
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
"""Bidirectional conversion from English descriptions to regex patterns."""
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from ..parser import parse_regex
|
||||
|
||||
@@ -100,18 +102,117 @@ PATTERN_TEMPLATES = {
|
||||
],
|
||||
"builder": lambda m: r"\B",
|
||||
},
|
||||
"character_class_any": {
|
||||
"patterns": [
|
||||
r"any\s+(?:of\s+)?(character|in)\s+([a-zA-Z])[-–—]([a-zA-Z])",
|
||||
r"(?:characters?|in)\s+range\s+([a-zA-Z])[-–—]([a-zA-Z])",
|
||||
],
|
||||
"builder": lambda m: f"[{m.group(1)}-{m.group(2)}]",
|
||||
},
|
||||
"character_class_specific": {
|
||||
"patterns": [
|
||||
r"any\s+(?:of\s+)?['\"]?([a-zA-Z0-9])['\"]?",
|
||||
],
|
||||
"builder": lambda m: f"[{m.group(1)}]",
|
||||
},
|
||||
"optional": {
|
||||
"patterns": [
|
||||
r"(?:optionally|optional|zero\s+or\s+one)\s+(.*)",
|
||||
],
|
||||
"builder": lambda m: f"(?:{m.group(1)})?",
|
||||
},
|
||||
"zero_or_more": {
|
||||
"patterns": [
|
||||
r"(?:zero\s+or\s+more|star|asterisk)\s+(.*)",
|
||||
],
|
||||
"builder": lambda m: f"(?:{m.group(1)})*",
|
||||
},
|
||||
"one_or_more": {
|
||||
"patterns": [
|
||||
r"(?:one\s+or\s+more|plus)\s+(.*)",
|
||||
],
|
||||
"builder": lambda m: f"(?:{m.group(1)})+",
|
||||
},
|
||||
"exactly": {
|
||||
"patterns": [
|
||||
r"exactly\s+(\d+)\s+(?:times?)?\s+(.*)",
|
||||
],
|
||||
"builder": lambda m: f"(?:{m.group(2)}){{{m.group(1)}}}",
|
||||
},
|
||||
"between": {
|
||||
"patterns": [
|
||||
r"between\s+(\d+)\s+and\s+(\d+)\s+(?:times?)?\s+(.*)",
|
||||
],
|
||||
"builder": lambda m: f"(?:{m.group(3)}){{{m.group(1)},{m.group(2)}}}",
|
||||
},
|
||||
"at_least": {
|
||||
"patterns": [
|
||||
r"at\s+least\s+(\d+)\s+(?:times?)?\s+(.*)",
|
||||
],
|
||||
"builder": lambda m: f"(?:{m.group(2)}){{{m.group(1)},}}",
|
||||
},
|
||||
"group": {
|
||||
"patterns": [
|
||||
r"(?:a\s+)?(?:capturing\s+)?group\s+(?:containing|with)\s+(.*)",
|
||||
],
|
||||
"builder": lambda m: f"({m.group(1)})",
|
||||
},
|
||||
"non_capturing_group": {
|
||||
"patterns": [
|
||||
r"(?:a\s+)?non-?capturing\s+group\s+(?:containing|with)\s+(.*)",
|
||||
],
|
||||
"builder": lambda m: f"(?:{m.group(1)})",
|
||||
},
|
||||
"named_group": {
|
||||
"patterns": [
|
||||
r"(?:a\s+)?(?:named\s+)?group\s+(?:named|called)\s+'([^']+)'\s+(?:containing|with)\s+(.*)",
|
||||
],
|
||||
"builder": lambda m: f"(?P<{m.group(1)}>{m.group(2)})",
|
||||
},
|
||||
"or": {
|
||||
"patterns": [
|
||||
r"(.*?)\s+or\s+(.*)",
|
||||
],
|
||||
"builder": lambda m: f"{m.group(1)}|{m.group(2)}",
|
||||
},
|
||||
"alternation": {
|
||||
"patterns": [
|
||||
r"(?:either\s+)?(.+?)\s+(?:or|\/\/)\s+(.+)",
|
||||
],
|
||||
"builder": lambda m: f"{m.group(1)}|{m.group(2)}",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def parse_english(description: str) -> str:
|
||||
"""Convert an English description to a regex pattern.
|
||||
|
||||
Args:
|
||||
description: The English description of the pattern.
|
||||
|
||||
Returns:
|
||||
The corresponding regex pattern.
|
||||
"""
|
||||
result = description
|
||||
|
||||
result = re.sub(r"\s+", " ", result).strip()
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def english_to_regex(description: str, flavor: str = "pcre") -> Tuple[str, List[str]]:
|
||||
"""Convert an English description to a regex pattern.
|
||||
|
||||
Args:
|
||||
description: The English description of the pattern.
|
||||
flavor: The target regex flavor.
|
||||
|
||||
Returns:
|
||||
A tuple of (regex_pattern, warnings).
|
||||
"""
|
||||
pattern = description.lower()
|
||||
warnings = []
|
||||
|
||||
warnings: List[str] = []
|
||||
|
||||
replacements = []
|
||||
|
||||
@@ -162,12 +263,21 @@ def english_to_regex(description: str, flavor: str = "pcre") -> Tuple[str, List[
|
||||
|
||||
result = re.sub(r"\s+", "", result)
|
||||
|
||||
result = re.sub(r"\[\^?([a-z])-([a-z])\]", lambda m: f"[{m.group(1)}-{m.group(2)}]", result, flags=re.IGNORECASE)
|
||||
result = re.sub(r"\[^?([a-z])-([a-z])\]", lambda m: f"[{m.group(1)}-{m.group(2)}]", result, flags=re.IGNORECASE)
|
||||
|
||||
return result, warnings
|
||||
|
||||
|
||||
def validate_roundtrip(original: str, converted: str) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate that converting from regex to English and back produces a valid pattern.
|
||||
|
||||
Args:
|
||||
original: The original regex pattern.
|
||||
converted: The pattern converted from English.
|
||||
|
||||
Returns:
|
||||
A tuple of (is_valid, error_message).
|
||||
"""
|
||||
try:
|
||||
parse_regex(converted)
|
||||
return True, None
|
||||
@@ -175,10 +285,20 @@ def validate_roundtrip(original: str, converted: str) -> Tuple[bool, Optional[st
|
||||
return False, str(e)
|
||||
|
||||
|
||||
def convert_english_to_regex(description: str, flavor: str = "pcre", validate: bool = True) -> Dict:
|
||||
def convert_english_to_regex(description: str, flavor: str = "pcre", validate: bool = True) -> Dict[str, Any]:
|
||||
"""Convert English description to regex with full context.
|
||||
|
||||
Args:
|
||||
description: The English description of the pattern.
|
||||
flavor: The target regex flavor.
|
||||
validate: Whether to validate the result.
|
||||
|
||||
Returns:
|
||||
A dictionary with conversion results.
|
||||
"""
|
||||
pattern, warnings = english_to_regex(description, flavor)
|
||||
|
||||
result = {
|
||||
result: Dict[str, Any] = {
|
||||
"input": description,
|
||||
"output": pattern,
|
||||
"flavor": flavor,
|
||||
|
||||
Reference in New Issue
Block a user