"""Pattern detection algorithms for shell history analysis.""" from collections import Counter from dataclasses import dataclass from typing import Optional from shellhist.core import HistoryEntry, HistoryStore @dataclass class CommandPattern: """Represents a detected command pattern.""" commands: tuple[str, ...] frequency: int percentage: float def ngram_analysis( store: HistoryStore, n: int = 2, min_frequency: int = 2, ) -> list[CommandPattern]: """Analyze command sequences using n-grams. Args: store: HistoryStore to analyze. n: Size of n-grams (2 for pairs, 3 for triplets). min_frequency: Minimum occurrences to include in results. Returns: List of CommandPattern objects sorted by frequency. """ commands = [entry.command for entry in store.entries] ngrams = [] for i in range(len(commands) - n + 1): ngram = tuple(commands[i:i + n]) ngrams.append(ngram) if not ngrams: return [] counter = Counter(ngrams) total_sequences = len(ngrams) patterns = [] for ngram, count in counter.most_common(): if count >= min_frequency: percentage = (count / total_sequences) * 100 if total_sequences > 0 else 0 patterns.append(CommandPattern( commands=ngram, frequency=count, percentage=round(percentage, 2) )) return patterns def detect_repetitive_commands( store: HistoryStore, min_frequency: int = 3, ) -> list[CommandPattern]: """Detect commands that are run repeatedly. Args: store: HistoryStore to analyze. min_frequency: Minimum occurrences to consider repetitive. Returns: List of CommandPattern objects sorted by frequency. """ patterns = [] total_commands = len(store.entries) for command, freq in store.get_most_frequent(limit=100): if freq >= min_frequency and total_commands > 0: percentage = (freq / total_commands) * 100 patterns.append(CommandPattern( commands=(command,), frequency=freq, percentage=round(percentage, 2) )) return patterns def detect_command_pairs( store: HistoryStore, min_frequency: int = 2, ) -> list[CommandPattern]: """Detect frequently occurring command pairs. Args: store: HistoryStore to analyze. min_frequency: Minimum occurrences for a pair. Returns: List of CommandPattern objects. """ return ngram_analysis(store, n=2, min_frequency=min_frequency) def detect_command_triplets( store: HistoryStore, min_frequency: int = 2, ) -> list[CommandPattern]: """Detect frequently occurring command triplets. Args: store: HistoryStore to analyze. min_frequency: Minimum occurrences for a triplet. Returns: List of CommandPattern objects. """ return ngram_analysis(store, n=3, min_frequency=min_frequency) def detect_common_sequences( store: HistoryStore, max_length: int = 5, min_occurrences: int = 2, ) -> list[CommandPattern]: """Detect common command sequences of varying lengths. Args: store: HistoryStore to analyze. max_length: Maximum sequence length to check. min_occurrences: Minimum occurrences for a sequence. Returns: List of CommandPattern objects sorted by frequency. """ all_patterns = [] commands = [entry.command for entry in store.entries] for n in range(2, max_length + 1): patterns = ngram_analysis(store, n=n, min_frequency=min_occurrences) all_patterns.extend(patterns) all_patterns.sort(key=lambda x: x.frequency, reverse=True) return all_patterns