Initial upload: cmdparse CLI tool with comprehensive documentation and CI/CD

2026-02-04 02:08:46 +00:00
parent e749da61df
commit c7d6e50f47
1 changed files with 129 additions and 0 deletions
--- a/cmdparse/patterns.py
+++ b/cmdparse/patterns.py
@@ -0,0 +1,129 @@
+"""Built-in pattern definitions for CLI output parsing."""
+
+import re
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class Pattern:
+    """Represents a regex pattern for detecting CLI output types."""
+    name: str
+    pattern: re.Pattern
+    confidence: int
+
+
+TABLE_HEADER_PATTERN = re.compile(
+    r'^[\s]*(?:\|[\s-]*)+[+\-|]+$|'
+    r'^[A-Z][A-Za-z\s]+(?:[A-Z][A-Za-z\s]*)+$|'
+    r'^\s*(?:[A-Z][A-Za-z_]+(?:\s+[A-Z][A-Za-z_]+)*)\s+(?:[A-Z][A-Za-z_]+(?:\s+[A-Z][A-Za-z_]+)*)\s*$',
+    re.MULTILINE
+)
+
+TABLE_ROW_PATTERN = re.compile(
+    r'^\s*\|?\s*(.+?)\s*\|?\s*$|'
+    r'^\s*([^\|]+?)\s*\|\s*(.+?)\s*$|'
+    r'^\s*\+[-+\+]+\+\s*$'
+)
+
+KEY_VALUE_COLON_PATTERN = re.compile(
+    r'^\s*([A-Za-z_][A-Za-z0-9_\-\.]*)\s*:\s*(.+)$',
+    re.MULTILINE
+)
+
+KEY_VALUE_EQUALS_PATTERN = re.compile(
+    r'^\s*([A-Za-z_][A-Za-z0-9_\-\.]*)\s*=\s*(.+)$',
+    re.MULTILINE
+)
+
+DELIMITED_COMMA_PATTERN = re.compile(
+    r'^\s*([^,]+),([^,]+),([^,]*)$'
+)
+
+DELIMITED_TAB_PATTERN = re.compile(
+    r'^\s*([^\t]+)\t([^\t]*)\s*$'
+)
+
+DELIMITED_SEMICOLON_PATTERN = re.compile(
+    r'^\s*([^;]+);([^;]+);([^;]*)\s*$'
+)
+
+JSON_LIKE_PATTERN = re.compile(
+    r'^\s*\{\s*"[^"]+"\s*:\s*'
+)
+
+KEY_VALUE_BLOCK_PATTERN = re.compile(
+    r'^([A-Za-z_][A-Za-z0-9_\-\.]*)\s+(\S+)$',
+    re.MULTILINE
+)
+
+
+PATTERNS = [
+    Pattern('table', TABLE_HEADER_PATTERN, 80),
+    Pattern('key_value_colon', KEY_VALUE_COLON_PATTERN, 70),
+    Pattern('key_value_equals', KEY_VALUE_EQUALS_PATTERN, 65),
+    Pattern('delimited_tab', DELIMITED_TAB_PATTERN, 85),
+    Pattern('delimited_comma', DELIMITED_COMMA_PATTERN, 75),
+    Pattern('delimited_semicolon', DELIMITED_SEMICOLON_PATTERN, 75),
+    Pattern('json_like', JSON_LIKE_PATTERN, 90),
+    Pattern('key_value_block', KEY_VALUE_BLOCK_PATTERN, 30),
+]
+
+
+def detect_pattern_type(text: str) -> str:
+    """Detect the pattern type of the given text."""
+    if not text or not text.strip():
+        return 'empty'
+
+    lines = text.strip().split('\n')
+    if len(lines) < 1:
+        return 'raw'
+
+    scores = {}
+    for pattern in PATTERNS:
+        scores[pattern.name] = 0
+
+    first_line = lines[0] if lines else ''
+
+    tab_count = sum(1 for line in lines if '\t' in line)
+    comma_count = sum(1 for line in lines if ',' in line and '\t' not in line)
+    colon_count = sum(1 for line in lines if ':' in line and '\t' not in line)
+    equals_count = sum(1 for line in lines if '=' in line and ':' not in line and '\t' not in line)
+    semicolon_count = sum(1 for line in lines if ';' in line and ',' not in line and '=' not in line and ':' not in line)
+
+    for pattern in PATTERNS:
+        if pattern.pattern.search(text):
+            scores[pattern.name] += pattern.confidence
+
+        if len(lines) > 1:
+            header_match = pattern.pattern.match(first_line)
+            if header_match:
+                scores[pattern.name] += 10
+
+    if tab_count >= len(lines) * 0.5:
+        scores['delimited_tab'] += 30
+
+    if comma_count >= len(lines) * 0.5 and tab_count < len(lines) * 0.5:
+        scores['delimited_comma'] += 25
+
+    if colon_count >= len(lines) * 0.5:
+        scores['key_value_colon'] += 25
+
+    if equals_count >= len(lines) * 0.5:
+        scores['key_value_equals'] += 25
+
+    if semicolon_count >= len(lines) * 0.5:
+        scores['delimited_semicolon'] += 30
+
+    if len(lines) >= 2:
+        words_first = len(first_line.split())
+        if all(len(line.split()) == words_first for line in lines[1:]):
+            if tab_count < len(lines) * 0.5 and comma_count < len(lines) * 0.5:
+                scores['table'] += 20
+
+    sorted_patterns = sorted(scores.items(), key=lambda x: x[1], reverse=True)
+
+    if sorted_patterns and sorted_patterns[0][1] > 0:
+        return sorted_patterns[0][0]
+
+    return 'raw'