feat: add recorder, project detector, and pattern detection

2026-01-31 10:24:45 +00:00
parent 535a00e5b6
commit e89bf6f8c8
1 changed files with 107 additions and 0 deletions
--- a/cli_memory/patterns.py
+++ b/cli_memory/patterns.py
@@ -0,0 +1,107 @@
 import logging
 from typing import Optional, List, Dict, Any
 from collections import Counter
 from datetime import datetime
 from .config import Config
 from .models import Command, Pattern, Project
 from .database import Database
 logger = logging.getLogger(__name__)
 class PatternDetector:
    def __init__(self, config: Optional[Config] = None, db: Optional[Database] = None):
        self.config = config or Config()
        self.db = db or Database()
    def detect_patterns(
        self, commands: List[Command], project_id: Optional[int] = None
    ) -> List[Pattern]:
        min_len = self.config.get("patterns.min_sequence_length", 3)
        min_occ = self.config.get("patterns.min_occurrences", 2)
        max_len = self.config.get("patterns.max_pattern_length", 10)
        sorted_commands = sorted(commands, key=lambda c: c.timestamp)
        sequences = self._extract_sequences(sorted_commands, min_len, max_len)
        pattern_counts = Counter(tuple(seq) for seq_list in sequences for seq in seq_list)
        patterns = []
        for seq, count in pattern_counts.items():
            if count >= min_occ:
                confidence = min(count / 5.0, 1.0)
                pattern = Pattern(
                    project_id=project_id,
                    name=self._generate_pattern_name(seq),
                    command_sequence=list(seq),
                    occurrences=count,
                    confidence=confidence,
                    created_at=datetime.utcnow(),
                )
                patterns.append(pattern)
        return patterns
    def _extract_sequences(
        self, commands: List[Command], min_len: int, max_len: int
    ) -> List[List[str]]:
        sequences = []
        cmd_strings = [c.command for c in commands]
        for length in range(min_len, max_len + 1):
            for i in range(len(cmd_strings) - length + 1):
                seq = cmd_strings[i : i + length]
                sequences.append([seq])
        return sequences
    def _generate_pattern_name(self, sequence: tuple) -> str:
        first_cmd = sequence[0].split()[0] if sequence else "pattern"
        return f"{first_cmd}-sequence-{len(sequence)}"
    def find_similar_patterns(
        self, pattern: Pattern, patterns: List[Pattern]
    ) -> List[Pattern]:
        similar = []
        threshold = self.config.get("patterns.similarity_threshold", 0.8)
        for other in patterns:
            if other.id == pattern.id:
                continue
            similarity = self._calculate_similarity(pattern, other)
            if similarity >= threshold:
                similar.append(other)
        return similar
    def _calculate_similarity(self, p1: Pattern, p2: Pattern) -> float:
        if not p1.command_sequence or not p2.command_sequence:
            return 0.0
        set1 = set(p1.command_sequence)
        set2 = set(p2.command_sequence)
        intersection = len(set1 & set2)
        union = len(set1 | set2)
        if union == 0:
            return 0.0
        return intersection / union
    def analyze_workflow_patterns(
        self, project_id: Optional[int] = None
    ) -> Dict[str, Any]:
        commands = self.db.get_commands(project_id=project_id, limit=10000)
        workflows = self.db.get_all_workflows(project_id)
        patterns = self.detect_patterns(commands, project_id)
        return {
            "total_patterns": len(patterns),
            "high_confidence_patterns": sum(
                1 for p in patterns if p.confidence >= 0.7
            ),
            "patterns": [p.to_dict() for p in patterns],
            "workflow_usage": sum(w.usage_count for w in workflows),
        }