Files
regex-humanizer/tests/test_parser.py
7000pctAUTO 89ae71c212
Some checks failed
CI / test (push) Failing after 9s
CI / build (push) Has been skipped
Add test files
2026-02-02 06:30:38 +00:00

134 lines
3.8 KiB
Python

"""Tests for the parser module."""
import pytest
from regex_humanizer.parser import (
tokenize,
parse_regex,
ParseError,
Literal,
CharacterClass,
Quantifier,
Group,
Alternation,
Anchor,
SpecialSequence,
)
class TestTokenizer:
def test_tokenize_literal(self):
tokens = tokenize("abc")
assert len(tokens) == 1
assert tokens[0].type == "LITERAL"
assert tokens[0].value == "abc"
def test_tokenize_anchors(self):
tokens = tokenize("^test$")
assert len(tokens) == 3
assert tokens[0].type == "ANCHOR_START"
assert tokens[1].type == "LITERAL"
assert tokens[2].type == "ANCHOR_END"
def test_tokenize_quantifiers(self):
tokens = tokenize("a*")
assert len(tokens) == 2
assert tokens[0].type == "LITERAL"
assert tokens[1].type == "STAR"
def test_tokenize_character_class(self):
tokens = tokenize("[abc]")
assert len(tokens) >= 2
assert tokens[0].type == "OPEN_BRACKET"
assert tokens[-1].type == "CLOSE_BRACKET"
def test_tokenize_groups(self):
tokens = tokenize("(abc)")
assert len(tokens) == 3
assert tokens[0].type == "OPEN_GROUP"
assert tokens[1].type == "LITERAL"
assert tokens[2].type == "CLOSE_GROUP"
def test_tokenize_alternation(self):
tokens = tokenize("a|b")
assert len(tokens) == 3
assert tokens[0].type == "LITERAL"
assert tokens[1].type == "ALTERNATION"
assert tokens[2].type == "LITERAL"
class TestParser:
def test_parse_literal(self):
ast = parse_regex("hello")
assert len(ast) == 1
assert isinstance(ast[0], Literal)
assert ast[0].value == "hello"
def test_parse_character_class(self):
ast = parse_regex("[abc]")
assert len(ast) == 1
assert isinstance(ast[0], CharacterClass)
def test_parse_inverted_class(self):
ast = parse_regex("[^abc]")
assert len(ast) == 1
assert isinstance(ast[0], CharacterClass)
assert ast[0].inverted is True
def test_parse_quantifier_star(self):
ast = parse_regex("a*")
assert len(ast) == 1
assert isinstance(ast[0], Quantifier)
def test_parse_quantifier_plus(self):
ast = parse_regex("a+")
assert len(ast) == 1
assert isinstance(ast[0], Quantifier)
def test_parse_quantifier_question(self):
ast = parse_regex("a?")
assert len(ast) == 1
assert isinstance(ast[0], Quantifier)
def test_parse_group(self):
ast = parse_regex("(abc)")
assert len(ast) == 1
assert isinstance(ast[0], Group)
assert len(ast[0].content) == 1
def test_parse_non_capturing_group(self):
ast = parse_regex("(?:abc)")
assert len(ast) == 1
assert isinstance(ast[0], Group)
assert ast[0].capturing is False
def test_parse_alternation(self):
ast = parse_regex("a|b")
assert len(ast) == 1
assert isinstance(ast[0], Alternation)
assert len(ast[0].options) == 2
def test_parse_anchors(self):
ast = parse_regex("^start$")
assert len(ast) == 3
assert isinstance(ast[0], Anchor)
assert isinstance(ast[2], Anchor)
def test_parse_special_sequences(self):
ast = parse_regex(r"\d+\w+")
assert len(ast) == 2
assert isinstance(ast[0], Quantifier)
def test_parse_complex_pattern(self):
pattern = r"^\w+@[a-z]+\.[a-z]+$"
ast = parse_regex(pattern)
assert len(ast) > 0
def test_parse_error_unclosed_bracket(self):
with pytest.raises(ParseError):
parse_regex("[abc")
def test_parse_error_unclosed_group(self):
with pytest.raises(ParseError):
parse_regex("(abc")