fix: add type annotations to converter files
Some checks failed
CI / test (push) Failing after 11s
CI / build (push) Has been skipped

This commit is contained in:
2026-02-02 07:01:40 +00:00
parent 5ef0b3cb72
commit 681f2b7e4e

View File

@@ -1,5 +1,7 @@
"""Bidirectional conversion from English descriptions to regex patterns."""
import re import re
from typing import Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
from ..parser import parse_regex from ..parser import parse_regex
@@ -100,18 +102,117 @@ PATTERN_TEMPLATES = {
], ],
"builder": lambda m: r"\B", "builder": lambda m: r"\B",
}, },
"character_class_any": {
"patterns": [
r"any\s+(?:of\s+)?(character|in)\s+([a-zA-Z])[-–—]([a-zA-Z])",
r"(?:characters?|in)\s+range\s+([a-zA-Z])[-–—]([a-zA-Z])",
],
"builder": lambda m: f"[{m.group(1)}-{m.group(2)}]",
},
"character_class_specific": {
"patterns": [
r"any\s+(?:of\s+)?['\"]?([a-zA-Z0-9])['\"]?",
],
"builder": lambda m: f"[{m.group(1)}]",
},
"optional": {
"patterns": [
r"(?:optionally|optional|zero\s+or\s+one)\s+(.*)",
],
"builder": lambda m: f"(?:{m.group(1)})?",
},
"zero_or_more": {
"patterns": [
r"(?:zero\s+or\s+more|star|asterisk)\s+(.*)",
],
"builder": lambda m: f"(?:{m.group(1)})*",
},
"one_or_more": {
"patterns": [
r"(?:one\s+or\s+more|plus)\s+(.*)",
],
"builder": lambda m: f"(?:{m.group(1)})+",
},
"exactly": {
"patterns": [
r"exactly\s+(\d+)\s+(?:times?)?\s+(.*)",
],
"builder": lambda m: f"(?:{m.group(2)}){{{m.group(1)}}}",
},
"between": {
"patterns": [
r"between\s+(\d+)\s+and\s+(\d+)\s+(?:times?)?\s+(.*)",
],
"builder": lambda m: f"(?:{m.group(3)}){{{m.group(1)},{m.group(2)}}}",
},
"at_least": {
"patterns": [
r"at\s+least\s+(\d+)\s+(?:times?)?\s+(.*)",
],
"builder": lambda m: f"(?:{m.group(2)}){{{m.group(1)},}}",
},
"group": {
"patterns": [
r"(?:a\s+)?(?:capturing\s+)?group\s+(?:containing|with)\s+(.*)",
],
"builder": lambda m: f"({m.group(1)})",
},
"non_capturing_group": {
"patterns": [
r"(?:a\s+)?non-?capturing\s+group\s+(?:containing|with)\s+(.*)",
],
"builder": lambda m: f"(?:{m.group(1)})",
},
"named_group": {
"patterns": [
r"(?:a\s+)?(?:named\s+)?group\s+(?:named|called)\s+'([^']+)'\s+(?:containing|with)\s+(.*)",
],
"builder": lambda m: f"(?P<{m.group(1)}>{m.group(2)})",
},
"or": {
"patterns": [
r"(.*?)\s+or\s+(.*)",
],
"builder": lambda m: f"{m.group(1)}|{m.group(2)}",
},
"alternation": {
"patterns": [
r"(?:either\s+)?(.+?)\s+(?:or|\/\/)\s+(.+)",
],
"builder": lambda m: f"{m.group(1)}|{m.group(2)}",
},
} }
def parse_english(description: str) -> str: def parse_english(description: str) -> str:
"""Convert an English description to a regex pattern.
Args:
description: The English description of the pattern.
Returns:
The corresponding regex pattern.
"""
result = description result = description
result = re.sub(r"\s+", " ", result).strip() result = re.sub(r"\s+", " ", result).strip()
return result return result
def english_to_regex(description: str, flavor: str = "pcre") -> Tuple[str, List[str]]: def english_to_regex(description: str, flavor: str = "pcre") -> Tuple[str, List[str]]:
"""Convert an English description to a regex pattern.
Args:
description: The English description of the pattern.
flavor: The target regex flavor.
Returns:
A tuple of (regex_pattern, warnings).
"""
pattern = description.lower() pattern = description.lower()
warnings = []
warnings: List[str] = []
replacements = [] replacements = []
@@ -162,12 +263,21 @@ def english_to_regex(description: str, flavor: str = "pcre") -> Tuple[str, List[
result = re.sub(r"\s+", "", result) result = re.sub(r"\s+", "", result)
result = re.sub(r"\[\^?([a-z])-([a-z])\]", lambda m: f"[{m.group(1)}-{m.group(2)}]", result, flags=re.IGNORECASE) result = re.sub(r"\[^?([a-z])-([a-z])\]", lambda m: f"[{m.group(1)}-{m.group(2)}]", result, flags=re.IGNORECASE)
return result, warnings return result, warnings
def validate_roundtrip(original: str, converted: str) -> Tuple[bool, Optional[str]]: def validate_roundtrip(original: str, converted: str) -> Tuple[bool, Optional[str]]:
"""Validate that converting from regex to English and back produces a valid pattern.
Args:
original: The original regex pattern.
converted: The pattern converted from English.
Returns:
A tuple of (is_valid, error_message).
"""
try: try:
parse_regex(converted) parse_regex(converted)
return True, None return True, None
@@ -175,10 +285,20 @@ def validate_roundtrip(original: str, converted: str) -> Tuple[bool, Optional[st
return False, str(e) return False, str(e)
def convert_english_to_regex(description: str, flavor: str = "pcre", validate: bool = True) -> Dict: def convert_english_to_regex(description: str, flavor: str = "pcre", validate: bool = True) -> Dict[str, Any]:
"""Convert English description to regex with full context.
Args:
description: The English description of the pattern.
flavor: The target regex flavor.
validate: Whether to validate the result.
Returns:
A dictionary with conversion results.
"""
pattern, warnings = english_to_regex(description, flavor) pattern, warnings = english_to_regex(description, flavor)
result = { result: Dict[str, Any] = {
"input": description, "input": description,
"output": pattern, "output": pattern,
"flavor": flavor, "flavor": flavor,