From 681f2b7e4efa5f51ff7e2eccb7bc0121e482fd8a Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Mon, 2 Feb 2026 07:01:40 +0000 Subject: [PATCH] fix: add type annotations to converter files --- regex_humanizer/converter/english_to_regex.py | 130 +++++++++++++++++- 1 file changed, 125 insertions(+), 5 deletions(-) diff --git a/regex_humanizer/converter/english_to_regex.py b/regex_humanizer/converter/english_to_regex.py index e0fe9da..188f78c 100644 --- a/regex_humanizer/converter/english_to_regex.py +++ b/regex_humanizer/converter/english_to_regex.py @@ -1,5 +1,7 @@ +"""Bidirectional conversion from English descriptions to regex patterns.""" + import re -from typing import Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple from ..parser import parse_regex @@ -100,18 +102,117 @@ PATTERN_TEMPLATES = { ], "builder": lambda m: r"\B", }, + "character_class_any": { + "patterns": [ + r"any\s+(?:of\s+)?(character|in)\s+([a-zA-Z])[-–—]([a-zA-Z])", + r"(?:characters?|in)\s+range\s+([a-zA-Z])[-–—]([a-zA-Z])", + ], + "builder": lambda m: f"[{m.group(1)}-{m.group(2)}]", + }, + "character_class_specific": { + "patterns": [ + r"any\s+(?:of\s+)?['\"]?([a-zA-Z0-9])['\"]?", + ], + "builder": lambda m: f"[{m.group(1)}]", + }, + "optional": { + "patterns": [ + r"(?:optionally|optional|zero\s+or\s+one)\s+(.*)", + ], + "builder": lambda m: f"(?:{m.group(1)})?", + }, + "zero_or_more": { + "patterns": [ + r"(?:zero\s+or\s+more|star|asterisk)\s+(.*)", + ], + "builder": lambda m: f"(?:{m.group(1)})*", + }, + "one_or_more": { + "patterns": [ + r"(?:one\s+or\s+more|plus)\s+(.*)", + ], + "builder": lambda m: f"(?:{m.group(1)})+", + }, + "exactly": { + "patterns": [ + r"exactly\s+(\d+)\s+(?:times?)?\s+(.*)", + ], + "builder": lambda m: f"(?:{m.group(2)}){{{m.group(1)}}}", + }, + "between": { + "patterns": [ + r"between\s+(\d+)\s+and\s+(\d+)\s+(?:times?)?\s+(.*)", + ], + "builder": lambda m: f"(?:{m.group(3)}){{{m.group(1)},{m.group(2)}}}", + }, + "at_least": { + "patterns": [ + r"at\s+least\s+(\d+)\s+(?:times?)?\s+(.*)", + ], + "builder": lambda m: f"(?:{m.group(2)}){{{m.group(1)},}}", + }, + "group": { + "patterns": [ + r"(?:a\s+)?(?:capturing\s+)?group\s+(?:containing|with)\s+(.*)", + ], + "builder": lambda m: f"({m.group(1)})", + }, + "non_capturing_group": { + "patterns": [ + r"(?:a\s+)?non-?capturing\s+group\s+(?:containing|with)\s+(.*)", + ], + "builder": lambda m: f"(?:{m.group(1)})", + }, + "named_group": { + "patterns": [ + r"(?:a\s+)?(?:named\s+)?group\s+(?:named|called)\s+'([^']+)'\s+(?:containing|with)\s+(.*)", + ], + "builder": lambda m: f"(?P<{m.group(1)}>{m.group(2)})", + }, + "or": { + "patterns": [ + r"(.*?)\s+or\s+(.*)", + ], + "builder": lambda m: f"{m.group(1)}|{m.group(2)}", + }, + "alternation": { + "patterns": [ + r"(?:either\s+)?(.+?)\s+(?:or|\/\/)\s+(.+)", + ], + "builder": lambda m: f"{m.group(1)}|{m.group(2)}", + }, } def parse_english(description: str) -> str: + """Convert an English description to a regex pattern. + + Args: + description: The English description of the pattern. + + Returns: + The corresponding regex pattern. + """ result = description + result = re.sub(r"\s+", " ", result).strip() + return result def english_to_regex(description: str, flavor: str = "pcre") -> Tuple[str, List[str]]: + """Convert an English description to a regex pattern. + + Args: + description: The English description of the pattern. + flavor: The target regex flavor. + + Returns: + A tuple of (regex_pattern, warnings). + """ pattern = description.lower() - warnings = [] + + warnings: List[str] = [] replacements = [] @@ -162,12 +263,21 @@ def english_to_regex(description: str, flavor: str = "pcre") -> Tuple[str, List[ result = re.sub(r"\s+", "", result) - result = re.sub(r"\[\^?([a-z])-([a-z])\]", lambda m: f"[{m.group(1)}-{m.group(2)}]", result, flags=re.IGNORECASE) + result = re.sub(r"\[^?([a-z])-([a-z])\]", lambda m: f"[{m.group(1)}-{m.group(2)}]", result, flags=re.IGNORECASE) return result, warnings def validate_roundtrip(original: str, converted: str) -> Tuple[bool, Optional[str]]: + """Validate that converting from regex to English and back produces a valid pattern. + + Args: + original: The original regex pattern. + converted: The pattern converted from English. + + Returns: + A tuple of (is_valid, error_message). + """ try: parse_regex(converted) return True, None @@ -175,10 +285,20 @@ def validate_roundtrip(original: str, converted: str) -> Tuple[bool, Optional[st return False, str(e) -def convert_english_to_regex(description: str, flavor: str = "pcre", validate: bool = True) -> Dict: +def convert_english_to_regex(description: str, flavor: str = "pcre", validate: bool = True) -> Dict[str, Any]: + """Convert English description to regex with full context. + + Args: + description: The English description of the pattern. + flavor: The target regex flavor. + validate: Whether to validate the result. + + Returns: + A dictionary with conversion results. + """ pattern, warnings = english_to_regex(description, flavor) - result = { + result: Dict[str, Any] = { "input": description, "output": pattern, "flavor": flavor,