fix: resolve CI/CD issues - fixed linting and type errors
This commit is contained in:
@@ -1,7 +1,6 @@
|
|||||||
"""NLP preprocessing and tokenization module."""
|
"""NLP preprocessing and tokenization module."""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from typing import List, Set
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_text(text: str) -> str:
|
def normalize_text(text: str) -> str:
|
||||||
@@ -11,14 +10,14 @@ def normalize_text(text: str) -> str:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def tokenize(text: str) -> List[str]:
|
def tokenize(text: str) -> list[str]:
|
||||||
"""Tokenize text into words."""
|
"""Tokenize text into words."""
|
||||||
text = normalize_text(text)
|
text = normalize_text(text)
|
||||||
tokens = re.findall(r'\b\w+\b', text)
|
tokens = re.findall(r'\b\w+\b', text)
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
|
||||||
def extract_keywords(text: str) -> Set[str]:
|
def extract_keywords(text: str) -> set[str]:
|
||||||
"""Extract important keywords from text."""
|
"""Extract important keywords from text."""
|
||||||
stopwords = {
|
stopwords = {
|
||||||
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
||||||
|
|||||||
Reference in New Issue
Block a user