From 4439f8030d4ecbec4f19f110b17b095d659cafeb Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Sat, 31 Jan 2026 13:13:04 +0000 Subject: [PATCH] Initial upload with CI/CD workflow --- shellhist/core/patterns.py | 144 +++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 shellhist/core/patterns.py diff --git a/shellhist/core/patterns.py b/shellhist/core/patterns.py new file mode 100644 index 0000000..b1b9249 --- /dev/null +++ b/shellhist/core/patterns.py @@ -0,0 +1,144 @@ +"""Pattern detection algorithms for shell history analysis.""" + +from collections import Counter +from dataclasses import dataclass +from typing import Optional + +from shellhist.core import HistoryEntry, HistoryStore + + +@dataclass +class CommandPattern: + """Represents a detected command pattern.""" + commands: tuple[str, ...] + frequency: int + percentage: float + + +def ngram_analysis( + store: HistoryStore, + n: int = 2, + min_frequency: int = 2, +) -> list[CommandPattern]: + """Analyze command sequences using n-grams. + + Args: + store: HistoryStore to analyze. + n: Size of n-grams (2 for pairs, 3 for triplets). + min_frequency: Minimum occurrences to include in results. + + Returns: + List of CommandPattern objects sorted by frequency. + """ + commands = [entry.command for entry in store.entries] + + ngrams = [] + for i in range(len(commands) - n + 1): + ngram = tuple(commands[i:i + n]) + ngrams.append(ngram) + + if not ngrams: + return [] + + counter = Counter(ngrams) + + total_sequences = len(ngrams) + patterns = [] + + for ngram, count in counter.most_common(): + if count >= min_frequency: + percentage = (count / total_sequences) * 100 if total_sequences > 0 else 0 + patterns.append(CommandPattern( + commands=ngram, + frequency=count, + percentage=round(percentage, 2) + )) + + return patterns + + +def detect_repetitive_commands( + store: HistoryStore, + min_frequency: int = 3, +) -> list[CommandPattern]: + """Detect commands that are run repeatedly. + + Args: + store: HistoryStore to analyze. + min_frequency: Minimum occurrences to consider repetitive. + + Returns: + List of CommandPattern objects sorted by frequency. + """ + patterns = [] + total_commands = len(store.entries) + + for command, freq in store.get_most_frequent(limit=100): + if freq >= min_frequency and total_commands > 0: + percentage = (freq / total_commands) * 100 + patterns.append(CommandPattern( + commands=(command,), + frequency=freq, + percentage=round(percentage, 2) + )) + + return patterns + + +def detect_command_pairs( + store: HistoryStore, + min_frequency: int = 2, +) -> list[CommandPattern]: + """Detect frequently occurring command pairs. + + Args: + store: HistoryStore to analyze. + min_frequency: Minimum occurrences for a pair. + + Returns: + List of CommandPattern objects. + """ + return ngram_analysis(store, n=2, min_frequency=min_frequency) + + +def detect_command_triplets( + store: HistoryStore, + min_frequency: int = 2, +) -> list[CommandPattern]: + """Detect frequently occurring command triplets. + + Args: + store: HistoryStore to analyze. + min_frequency: Minimum occurrences for a triplet. + + Returns: + List of CommandPattern objects. + """ + return ngram_analysis(store, n=3, min_frequency=min_frequency) + + +def detect_common_sequences( + store: HistoryStore, + max_length: int = 5, + min_occurrences: int = 2, +) -> list[CommandPattern]: + """Detect common command sequences of varying lengths. + + Args: + store: HistoryStore to analyze. + max_length: Maximum sequence length to check. + min_occurrences: Minimum occurrences for a sequence. + + Returns: + List of CommandPattern objects sorted by frequency. + """ + all_patterns = [] + commands = [entry.command for entry in store.entries] + + for n in range(2, max_length + 1): + patterns = ngram_analysis(store, n=n, min_frequency=min_occurrences) + all_patterns.extend(patterns) + + all_patterns.sort(key=lambda x: x.frequency, reverse=True) + + return all_patterns