"""Implementation of regex to English conversion.""" from typing import Any, List from ..parser import ( Alternation, Anchor, ASTNode, Backreference, CharacterClass, Group, Literal, Quantifier, SpecialSequence, parse_regex, ) def quantifier_description(quantifier: Quantifier, child_desc: str) -> str: """Generate description for a quantifier.""" if quantifier.min == 0 and quantifier.max == 1: base = "optionally" elif quantifier.min == 0 and quantifier.max == Quantifier.MAX_UNBOUNDED: base = "zero or more times" elif quantifier.min == 1 and quantifier.max == Quantifier.MAX_UNBOUNDED: base = "one or more times" elif quantifier.min == quantifier.max: base = f"exactly {quantifier.min} time{'s' if quantifier.min != 1 else ''}" elif quantifier.max == Quantifier.MAX_UNBOUNDED: base = f"at least {quantifier.min} time{'s' if quantifier.min != 1 else ''}" else: base = f"between {quantifier.min} and {quantifier.max} times" if quantifier.lazy: base += " (lazy)" elif quantifier.possessive: base += " (possessive)" return f"{child_desc}, {base}" def literal_description(node: Literal) -> str: """Generate description for a literal character.""" if node.value == " ": return "a space" elif node.value == "\t": return "a tab character" elif node.value == "\n": return "a newline character" elif node.value == "\r": return "a carriage return" elif node.value in r".^$*+?{}[]\|()": return f"the literal character '{node.value}'" else: return f"the letter '{node.value}'" def character_class_description(node: CharacterClass) -> str: """Generate description for a character class.""" parts = [] if node.inverted: parts.append("any character except") else: parts.append("any of") content_parts = [] for char in node.characters: if char == " ": content_parts.append("a space") elif char == "\t": content_parts.append("a tab") elif char == "\n": content_parts.append("a newline") elif char in "-]": content_parts.append(f"'{char}'") else: content_parts.append(f"'{char}'") for start, end in node.ranges: content_parts.append(f"characters from '{start}' to '{end}'") if not content_parts: return "an empty character class" if len(content_parts) == 1: content_str = content_parts[0] elif len(content_parts) == 2: content_str = f"{content_parts[0]} or {content_parts[1]}" else: content_str = ", ".join(content_parts[:-1]) + f", or {content_parts[-1]}" return " ".join(parts) + " " + content_str def special_sequence_description(node: SpecialSequence) -> str: """Generate description for a special sequence.""" sequences = { ".": "any single character", r"\d": "a digit (0-9)", r"\D": "any non-digit character", r"\w": "a word character (letter, digit, or underscore)", r"\W": "any non-word character", r"\s": "any whitespace character", r"\S": "any non-whitespace character", r"\b": "a word boundary", r"\B": "a non-word boundary", r"\A": "the start of the string", r"\Z": "the end of the string", r"\z": "the absolute end of the string", r"^": "the start of the string", r"$": "the end of the string", } return sequences.get(node.sequence, node.sequence) def anchor_description(node: Anchor) -> str: """Generate description for an anchor.""" anchors = { "^": "the start of the string", "$": "the end of the string", r"\b": "a word boundary", r"\B": "a position that is not a word boundary", } return anchors.get(node.kind, node.kind) def group_description(node: Group) -> str: """Generate description for a group.""" if node.name: name_desc = f"named '{node.name}'" elif not node.capturing: name_desc = "non-capturing" else: name_desc = "capturing" if node.content: inner_desc = generate_description(node.content) return f"a {name_desc} group containing: {inner_desc}" else: return f"an empty {name_desc} group" def alternation_description(node: Alternation) -> str: """Generate description for an alternation.""" option_descs = [] for option in node.options: if option: option_descs.append(generate_description(option)) else: option_descs.append("empty string") if len(option_descs) == 1: return option_descs[0] return " or ".join(option_descs) def backreference_description(node: Backreference) -> str: """Generate description for a backreference.""" if isinstance(node.reference, int): return f"whatever was matched by capture group {node.reference}" else: return f"whatever was matched by the group named '{node.reference}'" def generate_description(nodes: List[ASTNode]) -> str: """Generate a human-readable description for a list of AST nodes.""" if not nodes: return "an empty pattern" parts = [] for node in nodes: if isinstance(node, Literal): parts.append(literal_description(node)) elif isinstance(node, CharacterClass): parts.append(character_class_description(node)) elif isinstance(node, Quantifier): if hasattr(node, 'child') and node.child: child_desc = generate_description([node.child]) parts.append(quantifier_description(node, child_desc)) else: parts.append("an element with a quantifier") elif isinstance(node, SpecialSequence): parts.append(special_sequence_description(node)) elif isinstance(node, Anchor): parts.append(anchor_description(node)) elif isinstance(node, Group): parts.append(group_description(node)) elif isinstance(node, Alternation): parts.append(alternation_description(node)) elif isinstance(node, Backreference): parts.append(backreference_description(node)) else: parts.append(str(node)) if len(parts) == 1: return parts[0] result = "" for i, part in enumerate(parts): if i == 0: result = part elif i == len(parts) - 1: result += f" and {part}" else: result += f", {part}" return result def convert_to_english(pattern: str, flavor: str = "pcre") -> str: """Convert a regex pattern to human-readable English. Args: pattern: The regex pattern to convert. flavor: The regex flavor (pcre, javascript, python, go). Returns: A human-readable English description of the pattern. """ try: ast = parse_regex(pattern) return generate_description(ast) except Exception as e: return f"Error parsing pattern: {str(e)}" def convert_to_english_verbose(pattern: str, flavor: str = "pcre") -> dict: """Convert a regex pattern to detailed structure. Args: pattern: The regex pattern to convert. flavor: The regex flavor. Returns: A dictionary with pattern analysis. """ try: ast = parse_regex(pattern) result: dict[str, Any] = { "pattern": pattern, "flavor": flavor, "description": generate_description(ast), "structure": [], } for node in ast: node_info = node_to_dict(node) result["structure"].append(node_info) return result except Exception as e: return { "pattern": pattern, "flavor": flavor, "description": f"Error parsing pattern: {str(e)}", "structure": [], "error": str(e), } def node_to_dict(node: ASTNode) -> dict[str, Any]: """Convert an AST node to a dictionary.""" result: dict[str, Any] = {"type": type(node).__name__} if hasattr(node, 'position'): result["position"] = node.position if isinstance(node, Literal): result["value"] = node.value result["escaped"] = node.escaped elif isinstance(node, CharacterClass): result["inverted"] = node.inverted result["characters"] = node.characters result["ranges"] = node.ranges elif isinstance(node, Quantifier): result["min"] = node.min result["max"] = node.max result["lazy"] = node.lazy result["possessive"] = node.possessive if hasattr(node, 'child') and node.child: result["child"] = node_to_dict(node.child) elif isinstance(node, Group): result["capturing"] = node.capturing result["name"] = node.name result["content"] = [node_to_dict(child) for child in node.content] elif isinstance(node, Alternation): result["options"] = [[node_to_dict(child) for child in option] for option in node.options] elif isinstance(node, Anchor): result["kind"] = node.kind elif isinstance(node, SpecialSequence): result["sequence"] = node.sequence elif isinstance(node, Backreference): result["reference"] = node.reference return result