fix: resolve CI/CD issues - fixed linting and type errors
Some checks failed
CI / build (push) Has been cancelled
CI / test (push) Has been cancelled

This commit is contained in:
2026-01-31 06:07:08 +00:00
parent 2dfb25f56b
commit ecb843aa87

View File

@@ -1,7 +1,6 @@
"""NLP preprocessing and tokenization module.""" """NLP preprocessing and tokenization module."""
import re import re
from typing import List, Set
def normalize_text(text: str) -> str: def normalize_text(text: str) -> str:
@@ -11,14 +10,14 @@ def normalize_text(text: str) -> str:
return text return text
def tokenize(text: str) -> List[str]: def tokenize(text: str) -> list[str]:
"""Tokenize text into words.""" """Tokenize text into words."""
text = normalize_text(text) text = normalize_text(text)
tokens = re.findall(r'\b\w+\b', text) tokens = re.findall(r'\b\w+\b', text)
return tokens return tokens
def extract_keywords(text: str) -> Set[str]: def extract_keywords(text: str) -> set[str]:
"""Extract important keywords from text.""" """Extract important keywords from text."""
stopwords = { stopwords = {
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',