This commit is contained in:
144
shellhist/core/patterns.py
Normal file
144
shellhist/core/patterns.py
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
"""Pattern detection algorithms for shell history analysis."""
|
||||||
|
|
||||||
|
from collections import Counter
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from shellhist.core import HistoryEntry, HistoryStore
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CommandPattern:
|
||||||
|
"""Represents a detected command pattern."""
|
||||||
|
commands: tuple[str, ...]
|
||||||
|
frequency: int
|
||||||
|
percentage: float
|
||||||
|
|
||||||
|
|
||||||
|
def ngram_analysis(
|
||||||
|
store: HistoryStore,
|
||||||
|
n: int = 2,
|
||||||
|
min_frequency: int = 2,
|
||||||
|
) -> list[CommandPattern]:
|
||||||
|
"""Analyze command sequences using n-grams.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
store: HistoryStore to analyze.
|
||||||
|
n: Size of n-grams (2 for pairs, 3 for triplets).
|
||||||
|
min_frequency: Minimum occurrences to include in results.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of CommandPattern objects sorted by frequency.
|
||||||
|
"""
|
||||||
|
commands = [entry.command for entry in store.entries]
|
||||||
|
|
||||||
|
ngrams = []
|
||||||
|
for i in range(len(commands) - n + 1):
|
||||||
|
ngram = tuple(commands[i:i + n])
|
||||||
|
ngrams.append(ngram)
|
||||||
|
|
||||||
|
if not ngrams:
|
||||||
|
return []
|
||||||
|
|
||||||
|
counter = Counter(ngrams)
|
||||||
|
|
||||||
|
total_sequences = len(ngrams)
|
||||||
|
patterns = []
|
||||||
|
|
||||||
|
for ngram, count in counter.most_common():
|
||||||
|
if count >= min_frequency:
|
||||||
|
percentage = (count / total_sequences) * 100 if total_sequences > 0 else 0
|
||||||
|
patterns.append(CommandPattern(
|
||||||
|
commands=ngram,
|
||||||
|
frequency=count,
|
||||||
|
percentage=round(percentage, 2)
|
||||||
|
))
|
||||||
|
|
||||||
|
return patterns
|
||||||
|
|
||||||
|
|
||||||
|
def detect_repetitive_commands(
|
||||||
|
store: HistoryStore,
|
||||||
|
min_frequency: int = 3,
|
||||||
|
) -> list[CommandPattern]:
|
||||||
|
"""Detect commands that are run repeatedly.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
store: HistoryStore to analyze.
|
||||||
|
min_frequency: Minimum occurrences to consider repetitive.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of CommandPattern objects sorted by frequency.
|
||||||
|
"""
|
||||||
|
patterns = []
|
||||||
|
total_commands = len(store.entries)
|
||||||
|
|
||||||
|
for command, freq in store.get_most_frequent(limit=100):
|
||||||
|
if freq >= min_frequency and total_commands > 0:
|
||||||
|
percentage = (freq / total_commands) * 100
|
||||||
|
patterns.append(CommandPattern(
|
||||||
|
commands=(command,),
|
||||||
|
frequency=freq,
|
||||||
|
percentage=round(percentage, 2)
|
||||||
|
))
|
||||||
|
|
||||||
|
return patterns
|
||||||
|
|
||||||
|
|
||||||
|
def detect_command_pairs(
|
||||||
|
store: HistoryStore,
|
||||||
|
min_frequency: int = 2,
|
||||||
|
) -> list[CommandPattern]:
|
||||||
|
"""Detect frequently occurring command pairs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
store: HistoryStore to analyze.
|
||||||
|
min_frequency: Minimum occurrences for a pair.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of CommandPattern objects.
|
||||||
|
"""
|
||||||
|
return ngram_analysis(store, n=2, min_frequency=min_frequency)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_command_triplets(
|
||||||
|
store: HistoryStore,
|
||||||
|
min_frequency: int = 2,
|
||||||
|
) -> list[CommandPattern]:
|
||||||
|
"""Detect frequently occurring command triplets.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
store: HistoryStore to analyze.
|
||||||
|
min_frequency: Minimum occurrences for a triplet.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of CommandPattern objects.
|
||||||
|
"""
|
||||||
|
return ngram_analysis(store, n=3, min_frequency=min_frequency)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_common_sequences(
|
||||||
|
store: HistoryStore,
|
||||||
|
max_length: int = 5,
|
||||||
|
min_occurrences: int = 2,
|
||||||
|
) -> list[CommandPattern]:
|
||||||
|
"""Detect common command sequences of varying lengths.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
store: HistoryStore to analyze.
|
||||||
|
max_length: Maximum sequence length to check.
|
||||||
|
min_occurrences: Minimum occurrences for a sequence.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of CommandPattern objects sorted by frequency.
|
||||||
|
"""
|
||||||
|
all_patterns = []
|
||||||
|
commands = [entry.command for entry in store.entries]
|
||||||
|
|
||||||
|
for n in range(2, max_length + 1):
|
||||||
|
patterns = ngram_analysis(store, n=n, min_frequency=min_occurrences)
|
||||||
|
all_patterns.extend(patterns)
|
||||||
|
|
||||||
|
all_patterns.sort(key=lambda x: x.frequency, reverse=True)
|
||||||
|
|
||||||
|
return all_patterns
|
||||||
Reference in New Issue
Block a user