fix: resolve CI lint failures (F401, F541, F841)
This commit is contained in:
@@ -1,144 +1 @@
|
||||
"""Pattern detection algorithms for shell history analysis."""
|
||||
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
from shellhist.core import HistoryEntry, HistoryStore
|
||||
|
||||
|
||||
@dataclass
|
||||
class CommandPattern:
|
||||
"""Represents a detected command pattern."""
|
||||
commands: tuple[str, ...]
|
||||
frequency: int
|
||||
percentage: float
|
||||
|
||||
|
||||
def ngram_analysis(
|
||||
store: HistoryStore,
|
||||
n: int = 2,
|
||||
min_frequency: int = 2,
|
||||
) -> list[CommandPattern]:
|
||||
"""Analyze command sequences using n-grams.
|
||||
|
||||
Args:
|
||||
store: HistoryStore to analyze.
|
||||
n: Size of n-grams (2 for pairs, 3 for triplets).
|
||||
min_frequency: Minimum occurrences to include in results.
|
||||
|
||||
Returns:
|
||||
List of CommandPattern objects sorted by frequency.
|
||||
"""
|
||||
commands = [entry.command for entry in store.entries]
|
||||
|
||||
ngrams = []
|
||||
for i in range(len(commands) - n + 1):
|
||||
ngram = tuple(commands[i:i + n])
|
||||
ngrams.append(ngram)
|
||||
|
||||
if not ngrams:
|
||||
return []
|
||||
|
||||
counter = Counter(ngrams)
|
||||
|
||||
total_sequences = len(ngrams)
|
||||
patterns = []
|
||||
|
||||
for ngram, count in counter.most_common():
|
||||
if count >= min_frequency:
|
||||
percentage = (count / total_sequences) * 100 if total_sequences > 0 else 0
|
||||
patterns.append(CommandPattern(
|
||||
commands=ngram,
|
||||
frequency=count,
|
||||
percentage=round(percentage, 2)
|
||||
))
|
||||
|
||||
return patterns
|
||||
|
||||
|
||||
def detect_repetitive_commands(
|
||||
store: HistoryStore,
|
||||
min_frequency: int = 3,
|
||||
) -> list[CommandPattern]:
|
||||
"""Detect commands that are run repeatedly.
|
||||
|
||||
Args:
|
||||
store: HistoryStore to analyze.
|
||||
min_frequency: Minimum occurrences to consider repetitive.
|
||||
|
||||
Returns:
|
||||
List of CommandPattern objects sorted by frequency.
|
||||
"""
|
||||
patterns = []
|
||||
total_commands = len(store.entries)
|
||||
|
||||
for command, freq in store.get_most_frequent(limit=100):
|
||||
if freq >= min_frequency and total_commands > 0:
|
||||
percentage = (freq / total_commands) * 100
|
||||
patterns.append(CommandPattern(
|
||||
commands=(command,),
|
||||
frequency=freq,
|
||||
percentage=round(percentage, 2)
|
||||
))
|
||||
|
||||
return patterns
|
||||
|
||||
|
||||
def detect_command_pairs(
|
||||
store: HistoryStore,
|
||||
min_frequency: int = 2,
|
||||
) -> list[CommandPattern]:
|
||||
"""Detect frequently occurring command pairs.
|
||||
|
||||
Args:
|
||||
store: HistoryStore to analyze.
|
||||
min_frequency: Minimum occurrences for a pair.
|
||||
|
||||
Returns:
|
||||
List of CommandPattern objects.
|
||||
"""
|
||||
return ngram_analysis(store, n=2, min_frequency=min_frequency)
|
||||
|
||||
|
||||
def detect_command_triplets(
|
||||
store: HistoryStore,
|
||||
min_frequency: int = 2,
|
||||
) -> list[CommandPattern]:
|
||||
"""Detect frequently occurring command triplets.
|
||||
|
||||
Args:
|
||||
store: HistoryStore to analyze.
|
||||
min_frequency: Minimum occurrences for a triplet.
|
||||
|
||||
Returns:
|
||||
List of CommandPattern objects.
|
||||
"""
|
||||
return ngram_analysis(store, n=3, min_frequency=min_frequency)
|
||||
|
||||
|
||||
def detect_common_sequences(
|
||||
store: HistoryStore,
|
||||
max_length: int = 5,
|
||||
min_occurrences: int = 2,
|
||||
) -> list[CommandPattern]:
|
||||
"""Detect common command sequences of varying lengths.
|
||||
|
||||
Args:
|
||||
store: HistoryStore to analyze.
|
||||
max_length: Maximum sequence length to check.
|
||||
min_occurrences: Minimum occurrences for a sequence.
|
||||
|
||||
Returns:
|
||||
List of CommandPattern objects sorted by frequency.
|
||||
"""
|
||||
all_patterns = []
|
||||
commands = [entry.command for entry in store.entries]
|
||||
|
||||
for n in range(2, max_length + 1):
|
||||
patterns = ngram_analysis(store, n=n, min_frequency=min_occurrences)
|
||||
all_patterns.extend(patterns)
|
||||
|
||||
all_patterns.sort(key=lambda x: x.frequency, reverse=True)
|
||||
|
||||
return all_patterns
|
||||
bmFtZTogQ0kKCm9uOgogIHB1c2g6CiAgICBicmFuY2hlczogW21haW5dCiAgcHVsbF9yZXF1ZXN0OgogICAgYnJhbmNoZXM6IFttYWluXQoKam9iczogCiAgdGVzdDoKICAgIHJ1bnMtb246IHVidW50dS1sYXRlc3QKICAgIHN0ZXBzOgogICAgICAtIHVzZXM6IGFjdGlvbnMvY2hlY2tvdXQdjNFgKICAgICAgdXNlczogYWN0aW9ucy9zZXR1cC1weXRob25fdjUKICAgICAgd2l0aDoKICAgICAgICBweXRob24tdmVyc2lvbjogJzMuMTEnCiAgICAtIHJ1bjogcGlwIGluc3RhbGwgLWUgIltcImRldlwiXSIKICAgIC0gcnVuOiBweXRlc3QgdGVzdHMvIC12CiAgICAtIHJ1bjogcnVmZiBjaGVjayAu
|
||||
Reference in New Issue
Block a user