fix: resolve CI/CD issues - fixed linting and type errors
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
"""NLP preprocessing and tokenization module."""
|
||||
|
||||
import re
|
||||
from typing import List, Set
|
||||
|
||||
|
||||
def normalize_text(text: str) -> str:
|
||||
@@ -11,14 +10,14 @@ def normalize_text(text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
def tokenize(text: str) -> List[str]:
|
||||
def tokenize(text: str) -> list[str]:
|
||||
"""Tokenize text into words."""
|
||||
text = normalize_text(text)
|
||||
tokens = re.findall(r'\b\w+\b', text)
|
||||
return tokens
|
||||
|
||||
|
||||
def extract_keywords(text: str) -> Set[str]:
|
||||
def extract_keywords(text: str) -> set[str]:
|
||||
"""Extract important keywords from text."""
|
||||
stopwords = {
|
||||
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
||||
|
||||
Reference in New Issue
Block a user