"""Tests for the parser module.""" import pytest from regex_humanizer.parser import ( tokenize, parse_regex, ParseError, Literal, CharacterClass, Quantifier, Group, Alternation, Anchor, SpecialSequence, ) class TestTokenizer: def test_tokenize_literal(self): tokens = tokenize("abc") assert len(tokens) == 1 assert tokens[0].type == "LITERAL" assert tokens[0].value == "abc" def test_tokenize_anchors(self): tokens = tokenize("^test$") assert len(tokens) == 3 assert tokens[0].type == "ANCHOR_START" assert tokens[1].type == "LITERAL" assert tokens[2].type == "ANCHOR_END" def test_tokenize_quantifiers(self): tokens = tokenize("a*") assert len(tokens) == 2 assert tokens[0].type == "LITERAL" assert tokens[1].type == "STAR" def test_tokenize_character_class(self): tokens = tokenize("[abc]") assert len(tokens) >= 2 assert tokens[0].type == "OPEN_BRACKET" assert tokens[-1].type == "CLOSE_BRACKET" def test_tokenize_groups(self): tokens = tokenize("(abc)") assert len(tokens) == 3 assert tokens[0].type == "OPEN_GROUP" assert tokens[1].type == "LITERAL" assert tokens[2].type == "CLOSE_GROUP" def test_tokenize_alternation(self): tokens = tokenize("a|b") assert len(tokens) == 3 assert tokens[0].type == "LITERAL" assert tokens[1].type == "ALTERNATION" assert tokens[2].type == "LITERAL" class TestParser: def test_parse_literal(self): ast = parse_regex("hello") assert len(ast) == 1 assert isinstance(ast[0], Literal) assert ast[0].value == "hello" def test_parse_character_class(self): ast = parse_regex("[abc]") assert len(ast) == 1 assert isinstance(ast[0], CharacterClass) def test_parse_inverted_class(self): ast = parse_regex("[^abc]") assert len(ast) == 1 assert isinstance(ast[0], CharacterClass) assert ast[0].inverted is True def test_parse_quantifier_star(self): ast = parse_regex("a*") assert len(ast) == 1 assert isinstance(ast[0], Quantifier) def test_parse_quantifier_plus(self): ast = parse_regex("a+") assert len(ast) == 1 assert isinstance(ast[0], Quantifier) def test_parse_quantifier_question(self): ast = parse_regex("a?") assert len(ast) == 1 assert isinstance(ast[0], Quantifier) def test_parse_group(self): ast = parse_regex("(abc)") assert len(ast) == 1 assert isinstance(ast[0], Group) assert len(ast[0].content) == 1 def test_parse_non_capturing_group(self): ast = parse_regex("(?:abc)") assert len(ast) == 1 assert isinstance(ast[0], Group) assert ast[0].capturing is False def test_parse_alternation(self): ast = parse_regex("a|b") assert len(ast) == 1 assert isinstance(ast[0], Alternation) assert len(ast[0].options) == 2 def test_parse_anchors(self): ast = parse_regex("^start$") assert len(ast) == 3 assert isinstance(ast[0], Anchor) assert isinstance(ast[2], Anchor) def test_parse_special_sequences(self): ast = parse_regex(r"\d+\w+") assert len(ast) == 2 assert isinstance(ast[0], Quantifier) def test_parse_complex_pattern(self): pattern = r"^\w+@[a-z]+\.[a-z]+$" ast = parse_regex(pattern) assert len(ast) > 0 def test_parse_error_unclosed_bracket(self): with pytest.raises(ParseError): parse_regex("[abc") def test_parse_error_unclosed_group(self): with pytest.raises(ParseError): parse_regex("(abc")