diff --git a/shell_speak/nlp.py b/shell_speak/nlp.py index 6b6dee2..2a4e6e7 100644 --- a/shell_speak/nlp.py +++ b/shell_speak/nlp.py @@ -1,7 +1,6 @@ """NLP preprocessing and tokenization module.""" import re -from typing import List, Set def normalize_text(text: str) -> str: @@ -11,14 +10,14 @@ def normalize_text(text: str) -> str: return text -def tokenize(text: str) -> List[str]: +def tokenize(text: str) -> list[str]: """Tokenize text into words.""" text = normalize_text(text) tokens = re.findall(r'\b\w+\b', text) return tokens -def extract_keywords(text: str) -> Set[str]: +def extract_keywords(text: str) -> set[str]: """Extract important keywords from text.""" stopwords = { 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',