Initial upload: shell-speak CLI tool with natural language to shell command conversion

2026-01-31 05:31:15 +00:00
parent 3167439725
commit 1071335a7d
1 changed files with 123 additions and 0 deletions
--- a/shell_speak/matcher.py
+++ b/shell_speak/matcher.py
@@ -0,0 +1,123 @@
 """Pattern matching engine for shell commands."""
 import re
 from typing import List, Optional, Tuple
 from shell_speak.library import get_loader
 from shell_speak.models import CommandPattern, CommandMatch
 from shell_speak.nlp import normalize_text, extract_keywords, calculate_similarity, tokenize
 class PatternMatcher:
    """Matches natural language queries to command patterns."""
    def __init__(self):
        self._loader = get_loader()
    def match(self, query: str, tool: Optional[str] = None) -> Optional[CommandMatch]:
        """Match a query to the best command pattern."""
        normalized_query = normalize_text(query)
        self._loader.load_library(tool)
        corrections = self._loader.get_corrections()
        correction_key = f"{tool}:{normalized_query}" if tool else normalized_query
        if correction_key in corrections:
            return CommandMatch(
                pattern=CommandPattern(
                    name="user_correction",
                    tool=tool or "custom",
                    description="User-provided correction",
                    patterns=[],
                    template=corrections[correction_key],
                    explanation="Custom command from user correction",
                ),
                confidence=1.0,
                matched_query=query,
                command=corrections[correction_key],
                explanation="This command was learned from your previous correction.",
            )
        patterns = self._loader.get_patterns()
        if tool:
            patterns = [p for p in patterns if p.tool == tool]
        best_match = None
        best_score = 0.0
        for pattern in patterns:
            score = self._calculate_match_score(normalized_query, pattern)
            if score > best_score:
                best_score = score
                command = self._substitute_template(normalized_query, pattern)
                if command:
                    best_match = CommandMatch(
                        pattern=pattern,
                        confidence=score,
                        matched_query=query,
                        command=command,
                        explanation=pattern.explanation or self._generate_explanation(pattern, command),
                    )
        return best_match
    def _calculate_match_score(self, query: str, pattern: CommandPattern) -> float:
        """Calculate how well a query matches a pattern."""
        query_keywords = extract_keywords(query)
        pattern_keywords = set()
        for ptn in pattern.patterns:
            pattern_keywords.update(extract_keywords(ptn))
        if not pattern_keywords:
            return 0.0
        keyword_overlap = len(query_keywords & pattern_keywords)
        keyword_score = keyword_overlap / len(pattern_keywords) if pattern_keywords else 0.0
        best_similarity = 0.0
        for ptn in pattern.patterns:
            sim = calculate_similarity(query, ptn)
            if sim > best_similarity:
                best_similarity = sim
        combined_score = (keyword_score * 0.6) + (best_similarity * 0.4)
        return min(combined_score, 1.0)
    def _substitute_template(self, query: str, pattern: CommandPattern) -> Optional[str]:
        """Substitute variables in the template based on query."""
        template = pattern.template
        query_tokens = set(tokenize(query))
        pattern_tokens = set()
        for ptn in pattern.patterns:
            pattern_tokens.update(tokenize(ptn))
        diff_tokens = query_tokens - pattern_tokens
        variables = re.findall(r'\{(\w+)\}', template)
        var_values = {}
        for var in variables:
            lower_var = var.lower()
            matching_tokens = [t for t in diff_tokens if lower_var in t.lower() or t.lower() in lower_var]
            if matching_tokens:
                var_values[var] = matching_tokens[0]
        result = template
        for var, value in var_values.items():
            result = result.replace(f'{{{var}}}', value)
        if re.search(r'\{\w+\}', result):
            return None
        return result
    def _generate_explanation(self, pattern: CommandPattern, command: str) -> str:
        """Generate an explanation for the command."""
        return f"{pattern.description}"
 def get_matcher() -> PatternMatcher:
    """Get the global pattern matcher."""
    return PatternMatcher()