"""Tests for the parser module.""" import pytest from regex_humanizer.parser import ( tokenize, parse_regex, ParseError, Literal, CharacterClass, Quantifier, Group, Alternation, Anchor, ) class TestTokenizer: """Tests for the tokenize function.""" def test_tokenize_literal(self): """Test tokenizing a literal string.""" tokens = tokenize("abc") assert len(tokens) == 1 assert tokens[0].type == "LITERAL" assert tokens[0].value == "abc" def test_tokenize_anchors(self): """Test tokenizing anchor characters.""" tokens = tokenize("^test$") assert len(tokens) == 3 assert tokens[0].type == "ANCHOR_START" assert tokens[1].type == "LITERAL" assert tokens[2].type == "ANCHOR_END" def test_tokenize_quantifiers(self): """Test tokenizing quantifiers.""" tokens = tokenize("a*") assert len(tokens) == 2 assert tokens[0].type == "LITERAL" assert tokens[1].type == "STAR" def test_tokenize_character_class(self): """Test tokenizing character classes.""" tokens = tokenize("[abc]") assert len(tokens) >= 2 assert tokens[0].type == "OPEN_BRACKET" assert tokens[-1].type == "CLOSE_BRACKET" def test_tokenize_groups(self): """Test tokenizing groups.""" tokens = tokenize("(abc)") assert len(tokens) == 3 assert tokens[0].type == "OPEN_GROUP" assert tokens[1].type == "LITERAL" assert tokens[2].type == "CLOSE_GROUP" def test_tokenize_alternation(self): """Test tokenizing alternation.""" tokens = tokenize("a|b") assert len(tokens) == 3 assert tokens[0].type == "LITERAL" assert tokens[1].type == "ALTERNATION" assert tokens[2].type == "LITERAL" def test_tokenize_escape(self): """Test tokenizing escaped characters.""" tokens = tokenize(r"\.") assert len(tokens) == 1 assert tokens[0].type == "ESCAPED" def test_tokenize_special_sequences(self): """Test tokenizing special sequences.""" tokens = tokenize(r"\d+\w*\s?") assert len(tokens) >= 4 class TestParser: """Tests for the parse_regex function.""" def test_parse_literal(self): """Test parsing a literal pattern.""" ast = parse_regex("hello") assert len(ast) == 1 assert isinstance(ast[0], Literal) assert ast[0].value == "hello" def test_parse_character_class(self): """Test parsing a character class.""" ast = parse_regex("[abc]") assert len(ast) == 1 assert isinstance(ast[0], CharacterClass) def test_parse_inverted_class(self): """Test parsing an inverted class.""" ast = parse_regex("[^abc]") assert len(ast) == 1 assert isinstance(ast[0], CharacterClass) assert ast[0].inverted is True def test_parse_quantifier_star(self): """Test parsing the * quantifier.""" ast = parse_regex("a*") assert len(ast) == 1 assert isinstance(ast[0], Quantifier) def test_parse_quantifier_plus(self): """Test parsing the + quantifier.""" ast = parse_regex("a+") assert len(ast) == 1 assert isinstance(ast[0], Quantifier) def test_parse_quantifier_question(self): """Test parsing the ? quantifier.""" ast = parse_regex("a?") assert len(ast) == 1 assert isinstance(ast[0], Quantifier) def test_parse_group(self): """Test parsing a group.""" ast = parse_regex("(abc)") assert len(ast) == 1 assert isinstance(ast[0], Group) assert len(ast[0].content) == 1 assert isinstance(ast[0].content[0], Literal) assert ast[0].content[0].value == "abc" def test_parse_non_capturing_group(self): """Test parsing a non-capturing group.""" ast = parse_regex("(?:abc)") assert len(ast) == 1 assert isinstance(ast[0], Group) assert ast[0].capturing is False def test_parse_alternation(self): """Test parsing alternation.""" ast = parse_regex("a|b") assert len(ast) == 1 assert isinstance(ast[0], Alternation) assert len(ast[0].options) == 2 def test_parse_anchors(self): """Test parsing anchors.""" ast = parse_regex("^start$") assert len(ast) == 3 assert isinstance(ast[0], Anchor) assert isinstance(ast[1], Literal) assert isinstance(ast[2], Anchor) assert ast[1].value == "start" def test_parse_special_sequences(self): """Test parsing special sequences.""" ast = parse_regex(r"\d+\w+") assert len(ast) == 2 assert isinstance(ast[0], Quantifier) assert isinstance(ast[1], Quantifier) def test_parse_complex_pattern(self): """Test parsing a complex pattern.""" pattern = r"^\w+@[a-z]+\.[a-z]+$" ast = parse_regex(pattern) assert len(ast) > 0 def test_parse_error_unclosed_bracket(self): """Test parsing error for unclosed bracket.""" with pytest.raises(ParseError): parse_regex("[abc") def test_parse_error_unclosed_group(self): """Test parsing error for unclosed group.""" with pytest.raises(ParseError): parse_regex("(abc")