diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..a88b31d --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,133 @@ +"""Tests for the parser module.""" + +import pytest + +from regex_humanizer.parser import ( + tokenize, + parse_regex, + ParseError, + Literal, + CharacterClass, + Quantifier, + Group, + Alternation, + Anchor, + SpecialSequence, +) + + +class TestTokenizer: + def test_tokenize_literal(self): + tokens = tokenize("abc") + assert len(tokens) == 1 + assert tokens[0].type == "LITERAL" + assert tokens[0].value == "abc" + + def test_tokenize_anchors(self): + tokens = tokenize("^test$") + assert len(tokens) == 3 + assert tokens[0].type == "ANCHOR_START" + assert tokens[1].type == "LITERAL" + assert tokens[2].type == "ANCHOR_END" + + def test_tokenize_quantifiers(self): + tokens = tokenize("a*") + assert len(tokens) == 2 + assert tokens[0].type == "LITERAL" + assert tokens[1].type == "STAR" + + def test_tokenize_character_class(self): + tokens = tokenize("[abc]") + assert len(tokens) >= 2 + assert tokens[0].type == "OPEN_BRACKET" + assert tokens[-1].type == "CLOSE_BRACKET" + + def test_tokenize_groups(self): + tokens = tokenize("(abc)") + assert len(tokens) == 3 + assert tokens[0].type == "OPEN_GROUP" + assert tokens[1].type == "LITERAL" + assert tokens[2].type == "CLOSE_GROUP" + + def test_tokenize_alternation(self): + tokens = tokenize("a|b") + assert len(tokens) == 3 + assert tokens[0].type == "LITERAL" + assert tokens[1].type == "ALTERNATION" + assert tokens[2].type == "LITERAL" + + +class TestParser: + def test_parse_literal(self): + ast = parse_regex("hello") + assert len(ast) == 1 + assert isinstance(ast[0], Literal) + assert ast[0].value == "hello" + + def test_parse_character_class(self): + ast = parse_regex("[abc]") + assert len(ast) == 1 + assert isinstance(ast[0], CharacterClass) + + def test_parse_inverted_class(self): + ast = parse_regex("[^abc]") + assert len(ast) == 1 + assert isinstance(ast[0], CharacterClass) + assert ast[0].inverted is True + + def test_parse_quantifier_star(self): + ast = parse_regex("a*") + assert len(ast) == 1 + assert isinstance(ast[0], Quantifier) + + def test_parse_quantifier_plus(self): + ast = parse_regex("a+") + assert len(ast) == 1 + assert isinstance(ast[0], Quantifier) + + def test_parse_quantifier_question(self): + ast = parse_regex("a?") + assert len(ast) == 1 + assert isinstance(ast[0], Quantifier) + + def test_parse_group(self): + ast = parse_regex("(abc)") + assert len(ast) == 1 + assert isinstance(ast[0], Group) + assert len(ast[0].content) == 1 + + def test_parse_non_capturing_group(self): + ast = parse_regex("(?:abc)") + assert len(ast) == 1 + assert isinstance(ast[0], Group) + assert ast[0].capturing is False + + def test_parse_alternation(self): + ast = parse_regex("a|b") + assert len(ast) == 1 + assert isinstance(ast[0], Alternation) + assert len(ast[0].options) == 2 + + def test_parse_anchors(self): + ast = parse_regex("^start$") + assert len(ast) == 3 + assert isinstance(ast[0], Anchor) + assert isinstance(ast[2], Anchor) + + def test_parse_special_sequences(self): + ast = parse_regex(r"\d+\w+") + assert len(ast) == 2 + assert isinstance(ast[0], Quantifier) + + def test_parse_complex_pattern(self): + pattern = r"^\w+@[a-z]+\.[a-z]+$" + ast = parse_regex(pattern) + assert len(ast) > 0 + + def test_parse_error_unclosed_bracket(self): + with pytest.raises(ParseError): + parse_regex("[abc") + + def test_parse_error_unclosed_group(self): + with pytest.raises(ParseError): + parse_regex("(abc")