diff --git a/shellhist/core/patterns.py b/shellhist/core/patterns.py index 7093ada..47983ac 100644 --- a/shellhist/core/patterns.py +++ b/shellhist/core/patterns.py @@ -1 +1,125 @@ -bmFtZTogQ0kKCm9uOgogIHB1c2g6CiAgICBicmFuY2hlczogW21haW5dCiAgcHVsbF9yZXF1ZXN0OgogICAgYnJhbmNoZXM6IFttYWluXQoKam9iczogCiAgdGVzdDoKICAgIHJ1bnMtb246IHVidW50dS1sYXRlc3QKICAgIHN0ZXBzOgogICAgICAtIHVzZXM6IGFjdGlvbnMvY2hlY2tvdXQdjNFgKICAgICAgdXNlczogYWN0aW9ucy9zZXR1cC1weXRob25fdjUKICAgICAgd2l0aDoKICAgICAgICBweXRob24tdmVyc2lvbjogJzMuMTEnCiAgICAtIHJ1bjogcGlwIGluc3RhbGwgLWUgIltcImRldlwiXSIKICAgIC0gcnVuOiBweXRlc3QgdGVzdHMvIC12CiAgICAtIHJ1bjogcnVmZiBjaGVjayAu \ No newline at end of file +"""Pattern detection algorithms for shell history.""" + +from collections import defaultdict +from dataclasses import dataclass, field +from typing import Optional + +from shellhist.core import HistoryEntry, HistoryStore + + +@dataclass +class CommandPattern: + """Represents a detected command pattern.""" + commands: list[str] + frequency: int + percentage: float = 0.0 + + +def detect_command_pairs( + store: HistoryStore, + min_frequency: int = 2, +) -> list[CommandPattern]: + """Detect pairs of commands that frequently occur together.""" + pairs: dict[tuple[str, str], int] = defaultdict(int) + entries = store.entries + + for i in range(len(entries) - 1): + if entries[i].shell_type == entries[i + 1].shell_type: + pair = (entries[i].command, entries[i + 1].command) + pairs[pair] += 1 + + total_pairs = sum(pairs.values()) + patterns = [] + + for (cmd1, cmd2), count in pairs.items(): + if count >= min_frequency: + percentage = (count / total_pairs * 100) if total_pairs > 0 else 0.0 + patterns.append(CommandPattern( + commands=[cmd1, cmd2], + frequency=count, + percentage=percentage + )) + + patterns.sort(key=lambda x: x.frequency, reverse=True) + return patterns + + +def detect_command_triplets( + store: HistoryStore, + min_frequency: int = 2, +) -> list[CommandPattern]: + """Detect triplets of commands that frequently occur together.""" + triplets: dict[tuple[str, str, str], int] = defaultdict(int) + entries = store.entries + + for i in range(len(entries) - 2): + if (entries[i].shell_type == entries[i + 1].shell_type == entries[i + 2].shell_type): + triplet = (entries[i].command, entries[i + 1].command, entries[i + 2].command) + triplets[triplet] += 1 + + total_triplets = sum(triplets.values()) + patterns = [] + + for (cmd1, cmd2, cmd3), count in triplets.items(): + if count >= min_frequency: + percentage = (count / total_triplets * 100) if total_triplets > 0 else 0.0 + patterns.append(CommandPattern( + commands=[cmd1, cmd2, cmd3], + frequency=count, + percentage=percentage + )) + + patterns.sort(key=lambda x: x.frequency, reverse=True) + return patterns + + +def detect_repetitive_commands( + store: HistoryStore, + min_frequency: int = 2, +) -> list[CommandPattern]: + """Detect commands that are run repeatedly.""" + patterns = [] + + for command, freq in store.command_frequency.items(): + if freq >= min_frequency: + total = len(store.entries) + percentage = (freq / total * 100) if total > 0 else 0.0 + patterns.append(CommandPattern( + commands=[command], + frequency=freq, + percentage=percentage + )) + + patterns.sort(key=lambda x: x.frequency, reverse=True) + return patterns + + +def ngram_analysis( + store: HistoryStore, + n: int = 2, + min_frequency: int = 2, +) -> list[CommandPattern]: + """Analyze n-grams (sequences of n commands) in history.""" + ngrams: dict[tuple[str, ...], int] = defaultdict(int) + entries = store.entries + + for i in range(len(entries) - n + 1): + shell_types = [entries[j].shell_type for j in range(i, i + n)] + if len(set(shell_types)) == 1: + ngram = tuple(entries[j].command for j in range(i, i + n)) + ngrams[ngram] += 1 + + total_ngrams = sum(ngrams.values()) + patterns = [] + + for ngram, count in ngrams.items(): + if count >= min_frequency: + percentage = (count / total_ngrams * 100) if total_ngrams > 0 else 0.0 + patterns.append(CommandPattern( + commands=list(ngram), + frequency=count, + percentage=percentage + )) + + patterns.sort(key=lambda x: x.frequency, reverse=True) + return patterns