diff --git a/tests/test_parser.py b/tests/test_parser.py index a88b31d..0bdf8d0 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -12,18 +12,21 @@ from regex_humanizer.parser import ( Group, Alternation, Anchor, - SpecialSequence, ) class TestTokenizer: + """Tests for the tokenize function.""" + def test_tokenize_literal(self): + """Test tokenizing a literal string.""" tokens = tokenize("abc") assert len(tokens) == 1 assert tokens[0].type == "LITERAL" assert tokens[0].value == "abc" def test_tokenize_anchors(self): + """Test tokenizing anchor characters.""" tokens = tokenize("^test$") assert len(tokens) == 3 assert tokens[0].type == "ANCHOR_START" @@ -31,18 +34,21 @@ class TestTokenizer: assert tokens[2].type == "ANCHOR_END" def test_tokenize_quantifiers(self): + """Test tokenizing quantifiers.""" tokens = tokenize("a*") assert len(tokens) == 2 assert tokens[0].type == "LITERAL" assert tokens[1].type == "STAR" def test_tokenize_character_class(self): + """Test tokenizing character classes.""" tokens = tokenize("[abc]") assert len(tokens) >= 2 assert tokens[0].type == "OPEN_BRACKET" assert tokens[-1].type == "CLOSE_BRACKET" def test_tokenize_groups(self): + """Test tokenizing groups.""" tokens = tokenize("(abc)") assert len(tokens) == 3 assert tokens[0].type == "OPEN_GROUP" @@ -50,84 +56,117 @@ class TestTokenizer: assert tokens[2].type == "CLOSE_GROUP" def test_tokenize_alternation(self): + """Test tokenizing alternation.""" tokens = tokenize("a|b") assert len(tokens) == 3 assert tokens[0].type == "LITERAL" assert tokens[1].type == "ALTERNATION" assert tokens[2].type == "LITERAL" + def test_tokenize_escape(self): + """Test tokenizing escaped characters.""" + tokens = tokenize(r"\.") + assert len(tokens) == 1 + assert tokens[0].type == "ESCAPED" + + def test_tokenize_special_sequences(self): + """Test tokenizing special sequences.""" + tokens = tokenize(r"\d+\w*\s?") + assert len(tokens) >= 4 + class TestParser: + """Tests for the parse_regex function.""" + def test_parse_literal(self): + """Test parsing a literal pattern.""" ast = parse_regex("hello") assert len(ast) == 1 assert isinstance(ast[0], Literal) assert ast[0].value == "hello" def test_parse_character_class(self): + """Test parsing a character class.""" ast = parse_regex("[abc]") assert len(ast) == 1 assert isinstance(ast[0], CharacterClass) def test_parse_inverted_class(self): + """Test parsing an inverted character class.""" ast = parse_regex("[^abc]") assert len(ast) == 1 assert isinstance(ast[0], CharacterClass) assert ast[0].inverted is True def test_parse_quantifier_star(self): + """Test parsing the * quantifier.""" ast = parse_regex("a*") assert len(ast) == 1 assert isinstance(ast[0], Quantifier) def test_parse_quantifier_plus(self): + """Test parsing the + quantifier.""" ast = parse_regex("a+") assert len(ast) == 1 assert isinstance(ast[0], Quantifier) def test_parse_quantifier_question(self): + """Test parsing the ? quantifier.""" ast = parse_regex("a?") assert len(ast) == 1 assert isinstance(ast[0], Quantifier) def test_parse_group(self): + """Test parsing a group.""" ast = parse_regex("(abc)") assert len(ast) == 1 assert isinstance(ast[0], Group) assert len(ast[0].content) == 1 + assert isinstance(ast[0].content[0], Literal) + assert ast[0].content[0].value == "abc" def test_parse_non_capturing_group(self): + """Test parsing a non-capturing group.""" ast = parse_regex("(?:abc)") assert len(ast) == 1 assert isinstance(ast[0], Group) assert ast[0].capturing is False def test_parse_alternation(self): + """Test parsing alternation.""" ast = parse_regex("a|b") assert len(ast) == 1 assert isinstance(ast[0], Alternation) assert len(ast[0].options) == 2 def test_parse_anchors(self): + """Test parsing anchors.""" ast = parse_regex("^start$") assert len(ast) == 3 assert isinstance(ast[0], Anchor) + assert isinstance(ast[1], Literal) assert isinstance(ast[2], Anchor) + assert ast[1].value == "start" def test_parse_special_sequences(self): + """Test parsing special sequences.""" ast = parse_regex(r"\d+\w+") assert len(ast) == 2 assert isinstance(ast[0], Quantifier) + assert isinstance(ast[1], Quantifier) def test_parse_complex_pattern(self): + """Test parsing a complex pattern.""" pattern = r"^\w+@[a-z]+\.[a-z]+$" ast = parse_regex(pattern) assert len(ast) > 0 def test_parse_error_unclosed_bracket(self): + """Test parsing error for unclosed bracket.""" with pytest.raises(ParseError): parse_regex("[abc") def test_parse_error_unclosed_group(self): + """Test parsing error for unclosed group.""" with pytest.raises(ParseError): parse_regex("(abc")