"""Pattern detection algorithms for shell history.""" from collections import defaultdict from dataclasses import dataclass, field from typing import Optional from shellhist.core import HistoryEntry, HistoryStore @dataclass class CommandPattern: """Represents a detected command pattern.""" commands: list[str] frequency: int percentage: float = 0.0 def detect_command_pairs( store: HistoryStore, min_frequency: int = 2, ) -> list[CommandPattern]: """Detect pairs of commands that frequently occur together.""" pairs: dict[tuple[str, str], int] = defaultdict(int) entries = store.entries for i in range(len(entries) - 1): if entries[i].shell_type == entries[i + 1].shell_type: pair = (entries[i].command, entries[i + 1].command) pairs[pair] += 1 total_pairs = sum(pairs.values()) patterns = [] for (cmd1, cmd2), count in pairs.items(): if count >= min_frequency: percentage = (count / total_pairs * 100) if total_pairs > 0 else 0.0 patterns.append(CommandPattern( commands=[cmd1, cmd2], frequency=count, percentage=percentage )) patterns.sort(key=lambda x: x.frequency, reverse=True) return patterns def detect_command_triplets( store: HistoryStore, min_frequency: int = 2, ) -> list[CommandPattern]: """Detect triplets of commands that frequently occur together.""" triplets: dict[tuple[str, str, str], int] = defaultdict(int) entries = store.entries for i in range(len(entries) - 2): if (entries[i].shell_type == entries[i + 1].shell_type == entries[i + 2].shell_type): triplet = (entries[i].command, entries[i + 1].command, entries[i + 2].command) triplets[triplet] += 1 total_triplets = sum(triplets.values()) patterns = [] for (cmd1, cmd2, cmd3), count in triplets.items(): if count >= min_frequency: percentage = (count / total_triplets * 100) if total_triplets > 0 else 0.0 patterns.append(CommandPattern( commands=[cmd1, cmd2, cmd3], frequency=count, percentage=percentage )) patterns.sort(key=lambda x: x.frequency, reverse=True) return patterns def detect_repetitive_commands( store: HistoryStore, min_frequency: int = 2, ) -> list[CommandPattern]: """Detect commands that are run repeatedly.""" patterns = [] for command, freq in store.command_frequency.items(): if freq >= min_frequency: total = len(store.entries) percentage = (freq / total * 100) if total > 0 else 0.0 patterns.append(CommandPattern( commands=[command], frequency=freq, percentage=percentage )) patterns.sort(key=lambda x: x.frequency, reverse=True) return patterns def ngram_analysis( store: HistoryStore, n: int = 2, min_frequency: int = 2, ) -> list[CommandPattern]: """Analyze n-grams (sequences of n commands) in history.""" ngrams: dict[tuple[str, ...], int] = defaultdict(int) entries = store.entries for i in range(len(entries) - n + 1): shell_types = [entries[j].shell_type for j in range(i, i + n)] if len(set(shell_types)) == 1: ngram = tuple(entries[j].command for j in range(i, i + n)) ngrams[ngram] += 1 total_ngrams = sum(ngrams.values()) patterns = [] for ngram, count in ngrams.items(): if count >= min_frequency: percentage = (count / total_ngrams * 100) if total_ngrams > 0 else 0.0 patterns.append(CommandPattern( commands=list(ngram), frequency=count, percentage=percentage )) patterns.sort(key=lambda x: x.frequency, reverse=True) return patterns