173 lines
5.3 KiB
Python
173 lines
5.3 KiB
Python
"""Tests for the parser module."""
|
|
|
|
import pytest
|
|
|
|
from regex_humanizer.parser import (
|
|
tokenize,
|
|
parse_regex,
|
|
ParseError,
|
|
Literal,
|
|
CharacterClass,
|
|
Quantifier,
|
|
Group,
|
|
Alternation,
|
|
Anchor,
|
|
)
|
|
|
|
|
|
class TestTokenizer:
|
|
"""Tests for the tokenize function."""
|
|
|
|
def test_tokenize_literal(self):
|
|
"""Test tokenizing a literal string."""
|
|
tokens = tokenize("abc")
|
|
assert len(tokens) == 1
|
|
assert tokens[0].type == "LITERAL"
|
|
assert tokens[0].value == "abc"
|
|
|
|
def test_tokenize_anchors(self):
|
|
"""Test tokenizing anchor characters."""
|
|
tokens = tokenize("^test$")
|
|
assert len(tokens) == 3
|
|
assert tokens[0].type == "ANCHOR_START"
|
|
assert tokens[1].type == "LITERAL"
|
|
assert tokens[2].type == "ANCHOR_END"
|
|
|
|
def test_tokenize_quantifiers(self):
|
|
"""Test tokenizing quantifiers."""
|
|
tokens = tokenize("a*")
|
|
assert len(tokens) == 2
|
|
assert tokens[0].type == "LITERAL"
|
|
assert tokens[1].type == "STAR"
|
|
|
|
def test_tokenize_character_class(self):
|
|
"""Test tokenizing character classes."""
|
|
tokens = tokenize("[abc]")
|
|
assert len(tokens) >= 2
|
|
assert tokens[0].type == "OPEN_BRACKET"
|
|
assert tokens[-1].type == "CLOSE_BRACKET"
|
|
|
|
def test_tokenize_groups(self):
|
|
"""Test tokenizing groups."""
|
|
tokens = tokenize("(abc)")
|
|
assert len(tokens) == 3
|
|
assert tokens[0].type == "OPEN_GROUP"
|
|
assert tokens[1].type == "LITERAL"
|
|
assert tokens[2].type == "CLOSE_GROUP"
|
|
|
|
def test_tokenize_alternation(self):
|
|
"""Test tokenizing alternation."""
|
|
tokens = tokenize("a|b")
|
|
assert len(tokens) == 3
|
|
assert tokens[0].type == "LITERAL"
|
|
assert tokens[1].type == "ALTERNATION"
|
|
assert tokens[2].type == "LITERAL"
|
|
|
|
def test_tokenize_escape(self):
|
|
"""Test tokenizing escaped characters."""
|
|
tokens = tokenize(r"\.")
|
|
assert len(tokens) == 1
|
|
assert tokens[0].type == "ESCAPED"
|
|
|
|
def test_tokenize_special_sequences(self):
|
|
"""Test tokenizing special sequences."""
|
|
tokens = tokenize(r"\d+\w*\s?")
|
|
assert len(tokens) >= 4
|
|
|
|
|
|
class TestParser:
|
|
"""Tests for the parse_regex function."""
|
|
|
|
def test_parse_literal(self):
|
|
"""Test parsing a literal pattern."""
|
|
ast = parse_regex("hello")
|
|
assert len(ast) == 1
|
|
assert isinstance(ast[0], Literal)
|
|
assert ast[0].value == "hello"
|
|
|
|
def test_parse_character_class(self):
|
|
"""Test parsing a character class."""
|
|
ast = parse_regex("[abc]")
|
|
assert len(ast) == 1
|
|
assert isinstance(ast[0], CharacterClass)
|
|
|
|
def test_parse_inverted_class(self):
|
|
"""Test parsing an inverted class."""
|
|
ast = parse_regex("[^abc]")
|
|
assert len(ast) == 1
|
|
assert isinstance(ast[0], CharacterClass)
|
|
assert ast[0].inverted is True
|
|
|
|
def test_parse_quantifier_star(self):
|
|
"""Test parsing the * quantifier."""
|
|
ast = parse_regex("a*")
|
|
assert len(ast) == 1
|
|
assert isinstance(ast[0], Quantifier)
|
|
|
|
def test_parse_quantifier_plus(self):
|
|
"""Test parsing the + quantifier."""
|
|
ast = parse_regex("a+")
|
|
assert len(ast) == 1
|
|
assert isinstance(ast[0], Quantifier)
|
|
|
|
def test_parse_quantifier_question(self):
|
|
"""Test parsing the ? quantifier."""
|
|
ast = parse_regex("a?")
|
|
assert len(ast) == 1
|
|
assert isinstance(ast[0], Quantifier)
|
|
|
|
def test_parse_group(self):
|
|
"""Test parsing a group."""
|
|
ast = parse_regex("(abc)")
|
|
assert len(ast) == 1
|
|
assert isinstance(ast[0], Group)
|
|
assert len(ast[0].content) == 1
|
|
assert isinstance(ast[0].content[0], Literal)
|
|
assert ast[0].content[0].value == "abc"
|
|
|
|
def test_parse_non_capturing_group(self):
|
|
"""Test parsing a non-capturing group."""
|
|
ast = parse_regex("(?:abc)")
|
|
assert len(ast) == 1
|
|
assert isinstance(ast[0], Group)
|
|
assert ast[0].capturing is False
|
|
|
|
def test_parse_alternation(self):
|
|
"""Test parsing alternation."""
|
|
ast = parse_regex("a|b")
|
|
assert len(ast) == 1
|
|
assert isinstance(ast[0], Alternation)
|
|
assert len(ast[0].options) == 2
|
|
|
|
def test_parse_anchors(self):
|
|
"""Test parsing anchors."""
|
|
ast = parse_regex("^start$")
|
|
assert len(ast) == 3
|
|
assert isinstance(ast[0], Anchor)
|
|
assert isinstance(ast[1], Literal)
|
|
assert isinstance(ast[2], Anchor)
|
|
assert ast[1].value == "start"
|
|
|
|
def test_parse_special_sequences(self):
|
|
"""Test parsing special sequences."""
|
|
ast = parse_regex(r"\d+\w+")
|
|
assert len(ast) == 2
|
|
assert isinstance(ast[0], Quantifier)
|
|
assert isinstance(ast[1], Quantifier)
|
|
|
|
def test_parse_complex_pattern(self):
|
|
"""Test parsing a complex pattern."""
|
|
pattern = r"^\w+@[a-z]+\.[a-z]+$"
|
|
ast = parse_regex(pattern)
|
|
assert len(ast) > 0
|
|
|
|
def test_parse_error_unclosed_bracket(self):
|
|
"""Test parsing error for unclosed bracket."""
|
|
with pytest.raises(ParseError):
|
|
parse_regex("[abc")
|
|
|
|
def test_parse_error_unclosed_group(self):
|
|
"""Test parsing error for unclosed group."""
|
|
with pytest.raises(ParseError):
|
|
parse_regex("(abc")
|