Initial upload with CI/CD workflow
Some checks failed
CI / test (push) Has been cancelled

This commit is contained in:
2026-01-31 13:13:04 +00:00
parent e97df9d2b2
commit 4439f8030d

144
shellhist/core/patterns.py Normal file
View File

@@ -0,0 +1,144 @@
"""Pattern detection algorithms for shell history analysis."""
from collections import Counter
from dataclasses import dataclass
from typing import Optional
from shellhist.core import HistoryEntry, HistoryStore
@dataclass
class CommandPattern:
"""Represents a detected command pattern."""
commands: tuple[str, ...]
frequency: int
percentage: float
def ngram_analysis(
store: HistoryStore,
n: int = 2,
min_frequency: int = 2,
) -> list[CommandPattern]:
"""Analyze command sequences using n-grams.
Args:
store: HistoryStore to analyze.
n: Size of n-grams (2 for pairs, 3 for triplets).
min_frequency: Minimum occurrences to include in results.
Returns:
List of CommandPattern objects sorted by frequency.
"""
commands = [entry.command for entry in store.entries]
ngrams = []
for i in range(len(commands) - n + 1):
ngram = tuple(commands[i:i + n])
ngrams.append(ngram)
if not ngrams:
return []
counter = Counter(ngrams)
total_sequences = len(ngrams)
patterns = []
for ngram, count in counter.most_common():
if count >= min_frequency:
percentage = (count / total_sequences) * 100 if total_sequences > 0 else 0
patterns.append(CommandPattern(
commands=ngram,
frequency=count,
percentage=round(percentage, 2)
))
return patterns
def detect_repetitive_commands(
store: HistoryStore,
min_frequency: int = 3,
) -> list[CommandPattern]:
"""Detect commands that are run repeatedly.
Args:
store: HistoryStore to analyze.
min_frequency: Minimum occurrences to consider repetitive.
Returns:
List of CommandPattern objects sorted by frequency.
"""
patterns = []
total_commands = len(store.entries)
for command, freq in store.get_most_frequent(limit=100):
if freq >= min_frequency and total_commands > 0:
percentage = (freq / total_commands) * 100
patterns.append(CommandPattern(
commands=(command,),
frequency=freq,
percentage=round(percentage, 2)
))
return patterns
def detect_command_pairs(
store: HistoryStore,
min_frequency: int = 2,
) -> list[CommandPattern]:
"""Detect frequently occurring command pairs.
Args:
store: HistoryStore to analyze.
min_frequency: Minimum occurrences for a pair.
Returns:
List of CommandPattern objects.
"""
return ngram_analysis(store, n=2, min_frequency=min_frequency)
def detect_command_triplets(
store: HistoryStore,
min_frequency: int = 2,
) -> list[CommandPattern]:
"""Detect frequently occurring command triplets.
Args:
store: HistoryStore to analyze.
min_frequency: Minimum occurrences for a triplet.
Returns:
List of CommandPattern objects.
"""
return ngram_analysis(store, n=3, min_frequency=min_frequency)
def detect_common_sequences(
store: HistoryStore,
max_length: int = 5,
min_occurrences: int = 2,
) -> list[CommandPattern]:
"""Detect common command sequences of varying lengths.
Args:
store: HistoryStore to analyze.
max_length: Maximum sequence length to check.
min_occurrences: Minimum occurrences for a sequence.
Returns:
List of CommandPattern objects sorted by frequency.
"""
all_patterns = []
commands = [entry.command for entry in store.entries]
for n in range(2, max_length + 1):
patterns = ngram_analysis(store, n=n, min_frequency=min_occurrences)
all_patterns.extend(patterns)
all_patterns.sort(key=lambda x: x.frequency, reverse=True)
return all_patterns