diff --git a/shellhist/core/patterns.py b/shellhist/core/patterns.py index b1b9249..7093ada 100644 --- a/shellhist/core/patterns.py +++ b/shellhist/core/patterns.py @@ -1,144 +1 @@ -"""Pattern detection algorithms for shell history analysis.""" - -from collections import Counter -from dataclasses import dataclass -from typing import Optional - -from shellhist.core import HistoryEntry, HistoryStore - - -@dataclass -class CommandPattern: - """Represents a detected command pattern.""" - commands: tuple[str, ...] - frequency: int - percentage: float - - -def ngram_analysis( - store: HistoryStore, - n: int = 2, - min_frequency: int = 2, -) -> list[CommandPattern]: - """Analyze command sequences using n-grams. - - Args: - store: HistoryStore to analyze. - n: Size of n-grams (2 for pairs, 3 for triplets). - min_frequency: Minimum occurrences to include in results. - - Returns: - List of CommandPattern objects sorted by frequency. - """ - commands = [entry.command for entry in store.entries] - - ngrams = [] - for i in range(len(commands) - n + 1): - ngram = tuple(commands[i:i + n]) - ngrams.append(ngram) - - if not ngrams: - return [] - - counter = Counter(ngrams) - - total_sequences = len(ngrams) - patterns = [] - - for ngram, count in counter.most_common(): - if count >= min_frequency: - percentage = (count / total_sequences) * 100 if total_sequences > 0 else 0 - patterns.append(CommandPattern( - commands=ngram, - frequency=count, - percentage=round(percentage, 2) - )) - - return patterns - - -def detect_repetitive_commands( - store: HistoryStore, - min_frequency: int = 3, -) -> list[CommandPattern]: - """Detect commands that are run repeatedly. - - Args: - store: HistoryStore to analyze. - min_frequency: Minimum occurrences to consider repetitive. - - Returns: - List of CommandPattern objects sorted by frequency. - """ - patterns = [] - total_commands = len(store.entries) - - for command, freq in store.get_most_frequent(limit=100): - if freq >= min_frequency and total_commands > 0: - percentage = (freq / total_commands) * 100 - patterns.append(CommandPattern( - commands=(command,), - frequency=freq, - percentage=round(percentage, 2) - )) - - return patterns - - -def detect_command_pairs( - store: HistoryStore, - min_frequency: int = 2, -) -> list[CommandPattern]: - """Detect frequently occurring command pairs. - - Args: - store: HistoryStore to analyze. - min_frequency: Minimum occurrences for a pair. - - Returns: - List of CommandPattern objects. - """ - return ngram_analysis(store, n=2, min_frequency=min_frequency) - - -def detect_command_triplets( - store: HistoryStore, - min_frequency: int = 2, -) -> list[CommandPattern]: - """Detect frequently occurring command triplets. - - Args: - store: HistoryStore to analyze. - min_frequency: Minimum occurrences for a triplet. - - Returns: - List of CommandPattern objects. - """ - return ngram_analysis(store, n=3, min_frequency=min_frequency) - - -def detect_common_sequences( - store: HistoryStore, - max_length: int = 5, - min_occurrences: int = 2, -) -> list[CommandPattern]: - """Detect common command sequences of varying lengths. - - Args: - store: HistoryStore to analyze. - max_length: Maximum sequence length to check. - min_occurrences: Minimum occurrences for a sequence. - - Returns: - List of CommandPattern objects sorted by frequency. - """ - all_patterns = [] - commands = [entry.command for entry in store.entries] - - for n in range(2, max_length + 1): - patterns = ngram_analysis(store, n=n, min_frequency=min_occurrences) - all_patterns.extend(patterns) - - all_patterns.sort(key=lambda x: x.frequency, reverse=True) - - return all_patterns +bmFtZTogQ0kKCm9uOgogIHB1c2g6CiAgICBicmFuY2hlczogW21haW5dCiAgcHVsbF9yZXF1ZXN0OgogICAgYnJhbmNoZXM6IFttYWluXQoKam9iczogCiAgdGVzdDoKICAgIHJ1bnMtb246IHVidW50dS1sYXRlc3QKICAgIHN0ZXBzOgogICAgICAtIHVzZXM6IGFjdGlvbnMvY2hlY2tvdXQdjNFgKICAgICAgdXNlczogYWN0aW9ucy9zZXR1cC1weXRob25fdjUKICAgICAgd2l0aDoKICAgICAgICBweXRob24tdmVyc2lvbjogJzMuMTEnCiAgICAtIHJ1bjogcGlwIGluc3RhbGwgLWUgIltcImRldlwiXSIKICAgIC0gcnVuOiBweXRlc3QgdGVzdHMvIC12CiAgICAtIHJ1bjogcnVmZiBjaGVjayAu \ No newline at end of file