diff --git a/regex_humanizer/examples/generator.py b/regex_humanizer/examples/generator.py index 3ebbad0..db61fe4 100644 --- a/regex_humanizer/examples/generator.py +++ b/regex_humanizer/examples/generator.py @@ -1,7 +1,9 @@ +"""Generate concrete match examples for regex patterns.""" + import random import re import string -from typing import List +from typing import List, Set from ..parser import ( Alternation, @@ -25,14 +27,16 @@ PUNCTUATION = "!@#$%^&*()_+-=[]{}|;:,.<>?" def generate_literal_example(node: Literal) -> str: + """Generate an example for a literal.""" return node.value def generate_character_class_example(node: CharacterClass) -> str: + """Generate an example for a character class.""" options = [] for char in node.characters: - if char in r"-\] ": + if char in r"\-\]": options.append("\\" + char) elif char == "\t": options.append("\\t") @@ -56,6 +60,7 @@ def generate_character_class_example(node: CharacterClass) -> str: def generate_special_sequence_example(node: SpecialSequence) -> str: + """Generate an example for a special sequence.""" sequences = { ".": random.choice(string.ascii_letters + string.digits + "!@#$"), r"\d": random.choice(DIGITS), @@ -73,10 +78,12 @@ def generate_special_sequence_example(node: SpecialSequence) -> str: def generate_anchor_example(node: Anchor) -> str: + """Generate an example for an anchor.""" return "" def generate_quantifier_example(node: Quantifier) -> str: + """Generate an example for a quantifier.""" if not hasattr(node, 'child') or not node.child: return "*" @@ -131,10 +138,12 @@ def generate_quantifier_example(node: Quantifier) -> str: def generate_group_example(node: Group) -> str: + """Generate an example for a group.""" return "".join(generate_node_example(child) for child in node.content) def generate_alternation_example(node: Alternation) -> str: + """Generate an example for an alternation.""" if not node.options: return "" @@ -147,10 +156,12 @@ def generate_alternation_example(node: Alternation) -> str: def generate_backreference_example(node: Backreference) -> str: + """Generate an example for a backreference.""" return "[reference]" def generate_node_example(node: ASTNode) -> str: + """Generate an example for any AST node.""" if isinstance(node, Literal): return generate_literal_example(node) elif isinstance(node, CharacterClass): @@ -172,9 +183,19 @@ def generate_node_example(node: ASTNode) -> str: def generate_examples(pattern: str, count: int = 5, flavor: str = "pcre") -> List[str]: + """Generate example strings that match the given pattern. + + Args: + pattern: The regex pattern. + count: Number of examples to generate. + flavor: The regex flavor. + + Returns: + A list of example strings that match the pattern. + """ try: ast = parse_regex(pattern) - examples = set() + examples: Set[str] = set() for _ in range(count * 3): if len(examples) >= count: @@ -217,6 +238,17 @@ def generate_examples(pattern: str, count: int = 5, flavor: str = "pcre") -> Lis def generate_match_examples(pattern: str, test_string: str, count: int = 5, flavor: str = "pcre") -> List[str]: + """Generate examples from a test string that match the pattern. + + Args: + pattern: The regex pattern. + test_string: The string to search for matches. + count: Maximum number of examples to return. + flavor: The regex flavor. + + Returns: + A list of matching substrings from the test string. + """ try: compiled = re.compile(pattern) matches = compiled.findall(test_string)