Initial upload: cmdparse CLI tool with comprehensive documentation and CI/CD

This commit is contained in:
2026-02-04 02:08:46 +00:00
parent e749da61df
commit c7d6e50f47

129
cmdparse/patterns.py Normal file
View File

@@ -0,0 +1,129 @@
"""Built-in pattern definitions for CLI output parsing."""
import re
from dataclasses import dataclass
from typing import Optional
@dataclass
class Pattern:
"""Represents a regex pattern for detecting CLI output types."""
name: str
pattern: re.Pattern
confidence: int
TABLE_HEADER_PATTERN = re.compile(
r'^[\s]*(?:\|[\s-]*)+[+\-|]+$|'
r'^[A-Z][A-Za-z\s]+(?:[A-Z][A-Za-z\s]*)+$|'
r'^\s*(?:[A-Z][A-Za-z_]+(?:\s+[A-Z][A-Za-z_]+)*)\s+(?:[A-Z][A-Za-z_]+(?:\s+[A-Z][A-Za-z_]+)*)\s*$',
re.MULTILINE
)
TABLE_ROW_PATTERN = re.compile(
r'^\s*\|?\s*(.+?)\s*\|?\s*$|'
r'^\s*([^\|]+?)\s*\|\s*(.+?)\s*$|'
r'^\s*\+[-+\+]+\+\s*$'
)
KEY_VALUE_COLON_PATTERN = re.compile(
r'^\s*([A-Za-z_][A-Za-z0-9_\-\.]*)\s*:\s*(.+)$',
re.MULTILINE
)
KEY_VALUE_EQUALS_PATTERN = re.compile(
r'^\s*([A-Za-z_][A-Za-z0-9_\-\.]*)\s*=\s*(.+)$',
re.MULTILINE
)
DELIMITED_COMMA_PATTERN = re.compile(
r'^\s*([^,]+),([^,]+),([^,]*)$'
)
DELIMITED_TAB_PATTERN = re.compile(
r'^\s*([^\t]+)\t([^\t]*)\s*$'
)
DELIMITED_SEMICOLON_PATTERN = re.compile(
r'^\s*([^;]+);([^;]+);([^;]*)\s*$'
)
JSON_LIKE_PATTERN = re.compile(
r'^\s*\{\s*"[^"]+"\s*:\s*'
)
KEY_VALUE_BLOCK_PATTERN = re.compile(
r'^([A-Za-z_][A-Za-z0-9_\-\.]*)\s+(\S+)$',
re.MULTILINE
)
PATTERNS = [
Pattern('table', TABLE_HEADER_PATTERN, 80),
Pattern('key_value_colon', KEY_VALUE_COLON_PATTERN, 70),
Pattern('key_value_equals', KEY_VALUE_EQUALS_PATTERN, 65),
Pattern('delimited_tab', DELIMITED_TAB_PATTERN, 85),
Pattern('delimited_comma', DELIMITED_COMMA_PATTERN, 75),
Pattern('delimited_semicolon', DELIMITED_SEMICOLON_PATTERN, 75),
Pattern('json_like', JSON_LIKE_PATTERN, 90),
Pattern('key_value_block', KEY_VALUE_BLOCK_PATTERN, 30),
]
def detect_pattern_type(text: str) -> str:
"""Detect the pattern type of the given text."""
if not text or not text.strip():
return 'empty'
lines = text.strip().split('\n')
if len(lines) < 1:
return 'raw'
scores = {}
for pattern in PATTERNS:
scores[pattern.name] = 0
first_line = lines[0] if lines else ''
tab_count = sum(1 for line in lines if '\t' in line)
comma_count = sum(1 for line in lines if ',' in line and '\t' not in line)
colon_count = sum(1 for line in lines if ':' in line and '\t' not in line)
equals_count = sum(1 for line in lines if '=' in line and ':' not in line and '\t' not in line)
semicolon_count = sum(1 for line in lines if ';' in line and ',' not in line and '=' not in line and ':' not in line)
for pattern in PATTERNS:
if pattern.pattern.search(text):
scores[pattern.name] += pattern.confidence
if len(lines) > 1:
header_match = pattern.pattern.match(first_line)
if header_match:
scores[pattern.name] += 10
if tab_count >= len(lines) * 0.5:
scores['delimited_tab'] += 30
if comma_count >= len(lines) * 0.5 and tab_count < len(lines) * 0.5:
scores['delimited_comma'] += 25
if colon_count >= len(lines) * 0.5:
scores['key_value_colon'] += 25
if equals_count >= len(lines) * 0.5:
scores['key_value_equals'] += 25
if semicolon_count >= len(lines) * 0.5:
scores['delimited_semicolon'] += 30
if len(lines) >= 2:
words_first = len(first_line.split())
if all(len(line.split()) == words_first for line in lines[1:]):
if tab_count < len(lines) * 0.5 and comma_count < len(lines) * 0.5:
scores['table'] += 20
sorted_patterns = sorted(scores.items(), key=lambda x: x[1], reverse=True)
if sorted_patterns and sorted_patterns[0][1] > 0:
return sorted_patterns[0][0]
return 'raw'