Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 030c38a100 | |||
| 1076e8e41c | |||
| da49168a7d | |||
| 144d146593 | |||
| 9d0dd8b3b8 | |||
| 748420ba6f | |||
| 8c8ede0828 | |||
| e37e0ae595 | |||
| 99be0bfd71 | |||
| 5303e8f705 | |||
| c99c1da976 | |||
| 193c6e5ea6 | |||
| 05ee5b91c2 | |||
| 88fa08636e | |||
| 159abc8116 | |||
| da12cb1ef7 | |||
| dc6d344fe6 | |||
| 89b389f988 | |||
| 2f1045a0fc | |||
| d0e7f5c697 | |||
| b93d18462b | |||
| b99888fefa | |||
| 63f66c75fb | |||
| 21310b72fe | |||
| c7125b6a17 | |||
| 105727bf99 |
@@ -2,29 +2,32 @@ name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
branches: [ main, master ]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
branches: [ main, master ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -e ".[dev]"
|
||||
python -m pip install --upgrade pip
|
||||
pip install pytest pytest-cov
|
||||
pip install PyYAML click
|
||||
|
||||
- name: Run tests
|
||||
run: pytest tests/ -v
|
||||
- name: Run tests with coverage
|
||||
run: pytest tests/ --cov=cmdparse --cov-report=term-missing --cov-report=xml
|
||||
- name: Upload coverage
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: coverage-report
|
||||
path: coverage.xml
|
||||
if: always()
|
||||
run: |
|
||||
python -m pytest -v
|
||||
|
||||
- name: Run linting
|
||||
run: |
|
||||
pip install ruff
|
||||
python -m ruff check cmdparse/ tests/
|
||||
|
||||
19
app/.gitea/workflows/ci.yml
Normal file
19
app/.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,19 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- run: pip install -e ".[dev]"
|
||||
- run: pytest tests/ -v
|
||||
- run: ruff check .
|
||||
27
app/.gitignore
vendored
Normal file
27
app/.gitignore
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
*.log
|
||||
.DS_Store
|
||||
21
app/LICENSE
Normal file
21
app/LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
58
app/README.md
Normal file
58
app/README.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# cmdparse
|
||||
|
||||
A CLI tool that parses unstructured CLI command output into structured formats.
|
||||
|
||||
## Features
|
||||
|
||||
- Auto-detect output format from CLI commands
|
||||
- Support for multiple output formats: JSON, YAML, CSV, raw
|
||||
- Extract specific fields using dot notation
|
||||
- Custom configuration support
|
||||
- Built-in patterns for common CLI outputs
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install cmdparse
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# Parse CLI output with auto-detection
|
||||
cmdparse < input.txt
|
||||
|
||||
# Specify output format
|
||||
cmdparse -o yaml < input.txt
|
||||
|
||||
# Extract specific fields
|
||||
cmdparse -e field1 -e field2 < input.txt
|
||||
|
||||
# Use custom config
|
||||
cmdparse -c custom.yaml < input.txt
|
||||
```
|
||||
|
||||
## Supported Formats
|
||||
|
||||
- Tabular output (with/without borders)
|
||||
- Key-value pairs (colon or equals delimiter)
|
||||
- Delimited text (CSV, TSV, semicolon-separated)
|
||||
- JSON-like output
|
||||
- Raw/unstructured text
|
||||
|
||||
## Configuration
|
||||
|
||||
Create a `.cmdparse.yaml` file in your home directory or project root:
|
||||
|
||||
```yaml
|
||||
parsers:
|
||||
- name: custom
|
||||
pattern: your_regex
|
||||
fields:
|
||||
- field1
|
||||
- field2
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
3
app/cmdparse/__init__.py
Normal file
3
app/cmdparse/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""cmdparse - Parse unstructured CLI output into structured formats."""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
82
app/cmdparse/cli.py
Normal file
82
app/cmdparse/cli.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import sys
|
||||
import click
|
||||
from typing import Optional
|
||||
|
||||
from .parser import parse_text
|
||||
from .extractors import extract_fields
|
||||
from .formatters import format_data
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option(
|
||||
'--output', '-o',
|
||||
type=click.Choice(['json', 'yaml', 'csv', 'raw']),
|
||||
default='json',
|
||||
help='Output format (default: json)'
|
||||
)
|
||||
@click.option(
|
||||
'--format', '-f',
|
||||
type=click.Choice(['json', 'yaml', 'csv', 'raw', 'auto']),
|
||||
default='auto',
|
||||
help='Input format hint (auto-detect by default)'
|
||||
)
|
||||
@click.option(
|
||||
'--field', '-e',
|
||||
multiple=True,
|
||||
help='Fields to extract (supports dot notation)'
|
||||
)
|
||||
@click.option(
|
||||
'--config', '-c',
|
||||
type=click.Path(exists=True, readable=True),
|
||||
help='Path to custom config file'
|
||||
)
|
||||
@click.option(
|
||||
'--quiet', '-q',
|
||||
is_flag=True,
|
||||
help='Suppress pattern detection info'
|
||||
)
|
||||
@click.argument(
|
||||
'input_file',
|
||||
type=click.File('r'),
|
||||
default='-'
|
||||
)
|
||||
def main(
|
||||
output: str,
|
||||
format: str,
|
||||
field: tuple,
|
||||
config: Optional[str],
|
||||
quiet: bool,
|
||||
input_file
|
||||
) -> None:
|
||||
"""Parse unstructured CLI output into structured formats."""
|
||||
try:
|
||||
text = input_file.read()
|
||||
|
||||
if not text or not text.strip():
|
||||
click.echo("Error: No input provided", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
pattern_type = None
|
||||
if format != 'auto':
|
||||
pattern_type = format
|
||||
|
||||
data, detected = parse_text(text, pattern_type)
|
||||
|
||||
if data and not quiet:
|
||||
click.echo(f"Detected pattern: {detected}", err=True)
|
||||
|
||||
if field:
|
||||
fields = list(field)
|
||||
data = extract_fields(data, fields)
|
||||
|
||||
result = format_data(data, output)
|
||||
|
||||
click.echo(result)
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"Error: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
88
app/cmdparse/config.py
Normal file
88
app/cmdparse/config.py
Normal file
@@ -0,0 +1,88 @@
|
||||
from typing import Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
|
||||
|
||||
def find_config_file(config_path: Optional[str] = None) -> Optional[Path]:
|
||||
"""Find config file in specified path or default locations."""
|
||||
if config_path:
|
||||
path = Path(config_path)
|
||||
if path.exists():
|
||||
return path
|
||||
return None
|
||||
|
||||
config_locations = [
|
||||
Path.home() / '.cmdparse.yaml',
|
||||
Path.home() / '.cmdparse' / 'config.yaml',
|
||||
Path.cwd() / '.cmdparse.yaml',
|
||||
Path.cwd() / 'cmdparse.yaml',
|
||||
]
|
||||
|
||||
for location in config_locations:
|
||||
if location.exists():
|
||||
return location
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Load configuration from YAML file."""
|
||||
config_file = find_config_file(config_path)
|
||||
|
||||
if config_file is None:
|
||||
return {'parsers': []}
|
||||
|
||||
try:
|
||||
with open(config_file, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
return config if config else {'parsers': []}
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not load config file: {e}")
|
||||
return {'parsers': []}
|
||||
|
||||
|
||||
def get_builtin_config_path() -> Path:
|
||||
"""Get the path to the built-in config file."""
|
||||
return Path(__file__).parent.parent / 'config' / 'default_parsers.yaml'
|
||||
|
||||
|
||||
def load_builtin_config() -> Dict[str, Any]:
|
||||
"""Load built-in parser configurations."""
|
||||
config_path = get_builtin_config_path()
|
||||
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
return config if config else {'parsers': []}
|
||||
except Exception:
|
||||
return {'parsers': []}
|
||||
|
||||
return {'parsers': []}
|
||||
|
||||
|
||||
def get_custom_parser(config: Dict[str, Any], pattern_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get a custom parser definition from config."""
|
||||
parsers = config.get('parsers', [])
|
||||
|
||||
for parser in parsers:
|
||||
if parser.get('name') == pattern_name:
|
||||
return parser
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_all_parsers() -> Dict[str, Any]:
|
||||
"""Get all custom parsers merged with built-in."""
|
||||
builtin = load_builtin_config()
|
||||
custom = load_config()
|
||||
|
||||
all_parsers = builtin.get('parsers', []).copy()
|
||||
custom_parsers = custom.get('parsers', [])
|
||||
|
||||
custom_names = {p.get('name') for p in custom_parsers}
|
||||
filtered_builtin = [p for p in all_parsers if p.get('name') not in custom_names]
|
||||
|
||||
return {
|
||||
'parsers': filtered_builtin + custom_parsers
|
||||
}
|
||||
35
app/cmdparse/config/default_parsers.yaml
Normal file
35
app/cmdparse/config/default_parsers.yaml
Normal file
@@ -0,0 +1,35 @@
|
||||
parsers:
|
||||
- name: docker_ps
|
||||
pattern: docker_ps
|
||||
fields:
|
||||
- CONTAINER ID
|
||||
- IMAGE
|
||||
- COMMAND
|
||||
- CREATED
|
||||
- STATUS
|
||||
- PORTS
|
||||
- NAMES
|
||||
|
||||
- name: docker_images
|
||||
pattern: docker_images
|
||||
fields:
|
||||
- REPOSITORY
|
||||
- TAG
|
||||
- IMAGE ID
|
||||
- CREATED
|
||||
- SIZE
|
||||
|
||||
- name: ps aux
|
||||
pattern: ps_aux
|
||||
fields:
|
||||
- USER
|
||||
- PID
|
||||
- %CPU
|
||||
- %MEM
|
||||
- VSZ
|
||||
- RSS
|
||||
- TTY
|
||||
- STAT
|
||||
- START
|
||||
- TIME
|
||||
- COMMAND
|
||||
25
app/cmdparse/extractors.py
Normal file
25
app/cmdparse/extractors.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""Field extraction utilities."""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
def extract_fields(data: List[Dict[str, Any]], fields: List[str]) -> List[Dict[str, Any]]:
|
||||
"""Extract specified fields from parsed data."""
|
||||
result = []
|
||||
for item in data:
|
||||
extracted = {}
|
||||
for field in fields:
|
||||
if '.' in field:
|
||||
parts = field.split('.')
|
||||
value = item
|
||||
for part in parts:
|
||||
if isinstance(value, dict):
|
||||
value = value.get(part)
|
||||
else:
|
||||
value = None
|
||||
break
|
||||
extracted[field] = value
|
||||
else:
|
||||
extracted[field] = item.get(field)
|
||||
result.append(extracted)
|
||||
return result
|
||||
26
app/cmdparse/formatters.py
Normal file
26
app/cmdparse/formatters.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Output formatting utilities."""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
import json
|
||||
import yaml
|
||||
|
||||
|
||||
def format_data(data: List[Dict[str, Any]], format: str) -> str:
|
||||
"""Format parsed data into the specified output format."""
|
||||
if not data:
|
||||
return ''
|
||||
|
||||
if format == 'json':
|
||||
return json.dumps(data, indent=2)
|
||||
elif format == 'yaml':
|
||||
return yaml.safe_dump(data, default_flow_style=False)
|
||||
elif format == 'csv':
|
||||
if not data:
|
||||
return ''
|
||||
headers = list(data[0].keys())
|
||||
lines = [','.join(headers)]
|
||||
for row in data:
|
||||
lines.append(','.join(str(row.get(h, '')) for h in headers))
|
||||
return '\n'.join(lines)
|
||||
else:
|
||||
return '\n'.join(str(row) for row in data)
|
||||
140
app/cmdparse/parser.py
Normal file
140
app/cmdparse/parser.py
Normal file
@@ -0,0 +1,140 @@
|
||||
import re
|
||||
import csv
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from .patterns import detect_pattern_type
|
||||
|
||||
|
||||
def parse_table(text: str) -> List[Dict[str, str]]:
|
||||
"""Parse tabulated CLI output into list of dictionaries."""
|
||||
lines = text.strip().split('\n')
|
||||
if not lines:
|
||||
return []
|
||||
|
||||
headers: List[str] = []
|
||||
rows: List[Dict[str, str]] = []
|
||||
|
||||
separator_pattern = re.compile(r'^[\s]*[+|][-+|]+[+.]$|^[\s]*[+|]([-+|]+)[+|].*$')
|
||||
space_separator_pattern = re.compile(r'^[\s]*[-=]{10,}[\s]*$')
|
||||
|
||||
has_pipe = '|' in text
|
||||
|
||||
for line in lines:
|
||||
line = line.rstrip('\n')
|
||||
if separator_pattern.match(line):
|
||||
continue
|
||||
|
||||
if has_pipe:
|
||||
if '|' in line:
|
||||
parts = [p.strip() for p in line.split('|')]
|
||||
parts = [p for p in parts if p]
|
||||
|
||||
if not headers:
|
||||
if len(parts) >= 2:
|
||||
headers = parts
|
||||
else:
|
||||
row_data = {}
|
||||
for i, header in enumerate(headers):
|
||||
if i < len(parts):
|
||||
row_data[header] = parts[i]
|
||||
else:
|
||||
row_data[header] = ''
|
||||
rows.append(row_data)
|
||||
else:
|
||||
if space_separator_pattern.match(line):
|
||||
continue
|
||||
|
||||
if not headers:
|
||||
parts = line.split()
|
||||
if len(parts) >= 2:
|
||||
headers = parts
|
||||
else:
|
||||
parts = line.split()
|
||||
row_data = {}
|
||||
for i, header in enumerate(headers):
|
||||
if i < len(parts):
|
||||
row_data[header] = parts[i]
|
||||
else:
|
||||
row_data[header] = ''
|
||||
rows.append(row_data)
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def parse_key_value(text: str, delimiter: str = ':') -> List[Dict[str, str]]:
|
||||
"""Parse key-value pair output into dictionary."""
|
||||
result = {}
|
||||
|
||||
if delimiter == ':':
|
||||
pattern = re.compile(r'^\s*([A-Za-z_][A-Za-z0-9_\-\.]*)\s*:\s*(.+)$', re.MULTILINE)
|
||||
else:
|
||||
pattern = re.compile(r'^\s*([A-Za-z_][A-Za-z0-9_\-\.]*)\s*=\s*(.+)$', re.MULTILINE)
|
||||
|
||||
for match in pattern.finditer(text):
|
||||
key = match.group(1).strip()
|
||||
value = match.group(2).strip()
|
||||
result[key] = value
|
||||
|
||||
return [result] if result else []
|
||||
|
||||
|
||||
def parse_delimited(text: str, delimiter: str = ',') -> List[Dict[str, str]]:
|
||||
"""Parse delimited text (CSV, TSV, etc.) into list of dictionaries."""
|
||||
lines = text.strip().split('\n')
|
||||
if not lines:
|
||||
return []
|
||||
|
||||
reader = csv.DictReader(lines, delimiter=delimiter)
|
||||
rows = [row for row in reader]
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
def parse_raw(text: str) -> List[Dict[str, str]]:
|
||||
"""Fallback parser for unrecognized formats."""
|
||||
lines = text.strip().split('\n')
|
||||
if not lines:
|
||||
return []
|
||||
|
||||
result = []
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip():
|
||||
result.append({'line': i + 1, 'content': line.strip()})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def parse_text(text: str, pattern_type: Optional[str] = None) -> Tuple[List[Dict[str, str]], str]:
|
||||
"""
|
||||
Parse unstructured CLI output into structured format.
|
||||
|
||||
Args:
|
||||
text: The unstructured CLI output text
|
||||
pattern_type: Optional explicit pattern type, auto-detected if None
|
||||
|
||||
Returns:
|
||||
Tuple of (parsed data as list of dicts, detected pattern type)
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return [], 'empty'
|
||||
|
||||
detected_type = pattern_type or detect_pattern_type(text)
|
||||
|
||||
if detected_type == 'table':
|
||||
data = parse_table(text)
|
||||
elif detected_type in ('key_value_colon', 'key_value_equals'):
|
||||
data = parse_key_value(text, ':' if detected_type == 'key_value_colon' else '=')
|
||||
elif detected_type in ('delimited_comma', 'delimited_tab', 'delimited_semicolon'):
|
||||
delim_map = {
|
||||
'delimited_comma': ',',
|
||||
'delimited_tab': '\t',
|
||||
'delimited_semicolon': ';'
|
||||
}
|
||||
data = parse_delimited(text, delim_map.get(detected_type, ','))
|
||||
elif detected_type == 'json_like':
|
||||
data = [{'raw': text}]
|
||||
elif detected_type == 'key_value_block':
|
||||
data = parse_key_value(text, ':')
|
||||
else:
|
||||
data = parse_raw(text)
|
||||
|
||||
return data, detected_type
|
||||
126
app/cmdparse/patterns.py
Normal file
126
app/cmdparse/patterns.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class Pattern:
|
||||
"""Represents a regex pattern for detecting CLI output types."""
|
||||
name: str
|
||||
pattern: re.Pattern
|
||||
confidence: int
|
||||
|
||||
|
||||
TABLE_HEADER_PATTERN = re.compile(
|
||||
r'^[\s]*(?:\|[\s-]*)+[+\-|]+$|'
|
||||
r'^[A-Z][A-Za-z\s]+(?:[A-Z][A-Za-z\s]*)+$|'
|
||||
r'^\s*(?:[A-Z][A-Za-z_]+(?:\s+[A-Z][A-Za-z_]+)*)\s+(?:[A-Z][A-Za-z_]+(?:\s+[A-Z][A-Za-z_]+)*)\s*$',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
TABLE_ROW_PATTERN = re.compile(
|
||||
r'^\s*\|?\s*(.+?)\s*\|?\s*$|'
|
||||
r'^\s*([^\|]+?)\s*\|\s*(.+?)\s*$|'
|
||||
r'^\s*\+[-+\+]+\+\s*$'
|
||||
)
|
||||
|
||||
KEY_VALUE_COLON_PATTERN = re.compile(
|
||||
r'^\s*([A-Za-z_][A-Za-z0-9_\-\.]*)\s*:\s*(.+)$',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
KEY_VALUE_EQUALS_PATTERN = re.compile(
|
||||
r'^\s*([A-Za-z_][A-Za-z0-9_\-\.]*)\s*=\s*(.+)$',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
DELIMITED_COMMA_PATTERN = re.compile(
|
||||
r'^\s*([^,]+),([^,]+),([^,]*)$'
|
||||
)
|
||||
|
||||
DELIMITED_TAB_PATTERN = re.compile(
|
||||
r'^\s*([^\t]+)\t([^\t]*)\s*$'
|
||||
)
|
||||
|
||||
DELIMITED_SEMICOLON_PATTERN = re.compile(
|
||||
r'^\s*([^;]+);([^;]+);([^;]*)\s*$'
|
||||
)
|
||||
|
||||
JSON_LIKE_PATTERN = re.compile(
|
||||
r'^\s*\{\s*"[^"]+"\s*:\s*'
|
||||
)
|
||||
|
||||
KEY_VALUE_BLOCK_PATTERN = re.compile(
|
||||
r'^([A-Za-z_][A-Za-z0-9_\-\.]*)\s+(\S+)$',
|
||||
re.MULTILINE
|
||||
)
|
||||
|
||||
|
||||
PATTERNS = [
|
||||
Pattern('table', TABLE_HEADER_PATTERN, 80),
|
||||
Pattern('key_value_colon', KEY_VALUE_COLON_PATTERN, 70),
|
||||
Pattern('key_value_equals', KEY_VALUE_EQUALS_PATTERN, 65),
|
||||
Pattern('delimited_tab', DELIMITED_TAB_PATTERN, 85),
|
||||
Pattern('delimited_comma', DELIMITED_COMMA_PATTERN, 75),
|
||||
Pattern('delimited_semicolon', DELIMITED_SEMICOLON_PATTERN, 75),
|
||||
Pattern('json_like', JSON_LIKE_PATTERN, 90),
|
||||
Pattern('key_value_block', KEY_VALUE_BLOCK_PATTERN, 30),
|
||||
]
|
||||
|
||||
|
||||
def detect_pattern_type(text: str) -> str:
|
||||
"""Detect the pattern type of the given text."""
|
||||
if not text or not text.strip():
|
||||
return 'empty'
|
||||
|
||||
lines = text.strip().split('\n')
|
||||
if len(lines) < 1:
|
||||
return 'raw'
|
||||
|
||||
scores = {}
|
||||
for pattern in PATTERNS:
|
||||
scores[pattern.name] = 0
|
||||
|
||||
first_line = lines[0] if lines else ''
|
||||
|
||||
tab_count = sum(1 for line in lines if '\t' in line)
|
||||
comma_count = sum(1 for line in lines if ',' in line and '\t' not in line)
|
||||
colon_count = sum(1 for line in lines if ':' in line and '\t' not in line)
|
||||
equals_count = sum(1 for line in lines if '=' in line and ':' not in line and '\t' not in line)
|
||||
semicolon_count = sum(1 for line in lines if ';' in line and ',' not in line and '=' not in line and ':' not in line)
|
||||
|
||||
for pattern in PATTERNS:
|
||||
if pattern.pattern.search(text):
|
||||
scores[pattern.name] += pattern.confidence
|
||||
|
||||
if len(lines) > 1:
|
||||
header_match = pattern.pattern.match(first_line)
|
||||
if header_match:
|
||||
scores[pattern.name] += 10
|
||||
|
||||
if tab_count >= len(lines) * 0.5:
|
||||
scores['delimited_tab'] += 30
|
||||
|
||||
if comma_count >= len(lines) * 0.5 and tab_count < len(lines) * 0.5:
|
||||
scores['delimited_comma'] += 25
|
||||
|
||||
if colon_count >= len(lines) * 0.5:
|
||||
scores['key_value_colon'] += 25
|
||||
|
||||
if equals_count >= len(lines) * 0.5:
|
||||
scores['key_value_equals'] += 25
|
||||
|
||||
if semicolon_count >= len(lines) * 0.5:
|
||||
scores['delimited_semicolon'] += 30
|
||||
|
||||
if len(lines) >= 2:
|
||||
words_first = len(first_line.split())
|
||||
if all(len(line.split()) == words_first for line in lines[1:]):
|
||||
if tab_count < len(lines) * 0.5 and comma_count < len(lines) * 0.5:
|
||||
scores['table'] += 20
|
||||
|
||||
sorted_patterns = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
||||
|
||||
if sorted_patterns and sorted_patterns[0][1] > 0:
|
||||
return sorted_patterns[0][0]
|
||||
|
||||
return 'raw'
|
||||
39
app/pyproject.toml
Normal file
39
app/pyproject.toml
Normal file
@@ -0,0 +1,39 @@
|
||||
[project]
|
||||
name = "cmdparse"
|
||||
version = "0.1.0"
|
||||
description = "A CLI tool that parses unstructured CLI command output into structured formats"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
]
|
||||
dependencies = [
|
||||
"PyYAML>=6.0",
|
||||
"click>=8.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=7.0",
|
||||
"pytest-cov>=4.0",
|
||||
"types-PyYAML>=6.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
cmdparse = "cmdparse.cli:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=61.0"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py"]
|
||||
0
app/tests/__init__.py
Normal file
0
app/tests/__init__.py
Normal file
42
app/tests/test_cli.py
Normal file
42
app/tests/test_cli.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
from cmdparse.cli import main
|
||||
|
||||
|
||||
def test_cli_basic():
|
||||
"""Test basic CLI functionality."""
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(main, ['-q'], input='test input')
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_cli_output_format():
|
||||
"""Test different output formats."""
|
||||
runner = CliRunner()
|
||||
for fmt in ['json', 'yaml', 'csv', 'raw']:
|
||||
result = runner.invoke(main, ['-o', fmt, '-q'], input='test input')
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_cli_extract_fields():
|
||||
"""Test field extraction."""
|
||||
runner = CliRunner()
|
||||
input_text = 'key1: value1\nkey2: value2'
|
||||
result = runner.invoke(main, ['-e', 'key1', '-q'], input=input_text)
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
def test_cli_empty_input():
|
||||
"""Test handling of empty input."""
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(main, ['-q'], input='')
|
||||
assert result.exit_code == 1
|
||||
assert 'No input' in result.output
|
||||
|
||||
|
||||
def test_cli_quiet_flag():
|
||||
"""Test quiet flag suppresses pattern detection."""
|
||||
runner = CliRunner()
|
||||
result_quiet = runner.invoke(main, ['-q'], input='test')
|
||||
result_normal = runner.invoke(main, [], input='test')
|
||||
assert result_quiet.exit_code == 0
|
||||
21
app/tests/test_config.py
Normal file
21
app/tests/test_config.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import pytest
|
||||
from cmdparse.config import find_config_file, load_config, get_builtin_config_path
|
||||
|
||||
|
||||
def test_find_config_file_nonexistent():
|
||||
"""Test finding config file when none exists."""
|
||||
result = find_config_file('/nonexistent/path.yaml')
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_load_config_nonexistent():
|
||||
"""Test loading config when file doesn't exist."""
|
||||
result = load_config('/nonexistent/path.yaml')
|
||||
assert result == {'parsers': []}
|
||||
|
||||
|
||||
def test_get_builtin_config_path():
|
||||
"""Test getting built-in config path."""
|
||||
path = get_builtin_config_path()
|
||||
assert 'config' in str(path)
|
||||
assert 'default_parsers.yaml' in str(path)
|
||||
72
app/tests/test_parsers.py
Normal file
72
app/tests/test_parsers.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import pytest
|
||||
from cmdparse.parser import parse_table, parse_key_value, parse_delimited, parse_raw, parse_text
|
||||
|
||||
|
||||
def test_parse_table_with_pipes():
|
||||
"""Test parsing table with pipe separators."""
|
||||
text = "| Name | Age | City |\n| Alice | 30 | NYC |\n| Bob | 25 | LA |"
|
||||
result = parse_table(text)
|
||||
assert len(result) == 2
|
||||
assert result[0]['Name'] == 'Alice'
|
||||
assert result[1]['Name'] == 'Bob'
|
||||
|
||||
|
||||
def test_parse_table_without_pipes():
|
||||
"""Test parsing table without pipe separators."""
|
||||
text = "Name Age City\nAlice 30 NYC\nBob 25 LA"
|
||||
result = parse_table(text)
|
||||
assert len(result) == 2
|
||||
assert result[0]['Name'] == 'Alice'
|
||||
|
||||
|
||||
def test_parse_key_value_colon():
|
||||
"""Test parsing key-value with colon delimiter."""
|
||||
text = "key1: value1\nkey2: value2"
|
||||
result = parse_key_value(text, ':')
|
||||
assert len(result) == 1
|
||||
assert result[0]['key1'] == 'value1'
|
||||
|
||||
|
||||
def test_parse_key_value_equals():
|
||||
"""Test parsing key-value with equals delimiter."""
|
||||
text = "key1=value1\nkey2=value2"
|
||||
result = parse_key_value(text, '=')
|
||||
assert len(result) == 1
|
||||
assert result[0]['key1'] == 'value1'
|
||||
|
||||
|
||||
def test_parse_delimited_comma():
|
||||
"""Test parsing comma-delimited text."""
|
||||
text = "name,age,city\nalice,30,nyc\nbob,25,la"
|
||||
result = parse_delimited(text, ',')
|
||||
assert len(result) == 2
|
||||
assert result[0]['name'] == 'alice'
|
||||
|
||||
|
||||
def test_parse_raw():
|
||||
"""Test parsing raw text."""
|
||||
text = "line1\nline2\nline3"
|
||||
result = parse_raw(text)
|
||||
assert len(result) == 3
|
||||
assert result[0]['content'] == 'line1'
|
||||
|
||||
|
||||
def test_parse_text_auto_detect_table():
|
||||
"""Test auto-detection of table format."""
|
||||
text = "| Header |\n| Value |"
|
||||
data, detected = parse_text(text)
|
||||
assert detected == 'table'
|
||||
|
||||
|
||||
def test_parse_text_auto_detect_key_value():
|
||||
"""Test auto-detection of key-value format."""
|
||||
text = "key: value"
|
||||
data, detected = parse_text(text)
|
||||
assert detected in ('key_value_colon', 'key_value_block')
|
||||
|
||||
|
||||
def test_parse_text_empty_input():
|
||||
"""Test handling of empty input."""
|
||||
data, detected = parse_text("")
|
||||
assert data == []
|
||||
assert detected == 'empty'
|
||||
36
app/tests/test_patterns.py
Normal file
36
app/tests/test_patterns.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import pytest
|
||||
from cmdparse.patterns import detect_pattern_type, PATTERNS
|
||||
|
||||
|
||||
def test_detect_table_with_pipes():
|
||||
"""Test detection of table format with pipe separators."""
|
||||
text = "| Name | Age |\n| Alice | 30 |"
|
||||
assert detect_pattern_type(text) == 'table'
|
||||
|
||||
|
||||
def test_detect_key_value_colon():
|
||||
"""Test detection of key-value with colon."""
|
||||
text = "key1: value1\nkey2: value2"
|
||||
assert detect_pattern_type(text) == 'key_value_colon'
|
||||
|
||||
|
||||
def test_detect_delimited_tab():
|
||||
"""Test detection of tab-delimited format."""
|
||||
text = "name\tage\tcity\nalice\t30\tnyc"
|
||||
assert detect_pattern_type(text) == 'delimited_tab'
|
||||
|
||||
|
||||
def test_detect_empty():
|
||||
"""Test detection of empty input."""
|
||||
assert detect_pattern_type("") == 'empty'
|
||||
|
||||
|
||||
def test_detect_raw():
|
||||
"""Test detection of raw/unstructured text."""
|
||||
text = "some random text without pattern"
|
||||
assert detect_pattern_type(text) == 'raw'
|
||||
|
||||
|
||||
def test_patterns_not_empty():
|
||||
"""Test that PATTERNS list is not empty."""
|
||||
assert len(PATTERNS) > 0
|
||||
@@ -1,6 +1,5 @@
|
||||
"""Tests for field extraction module."""
|
||||
|
||||
import pytest
|
||||
from cmdparse.extractors import (
|
||||
extract_fields,
|
||||
get_nested_value,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"""Tests for output formatting module."""
|
||||
|
||||
import pytest
|
||||
from cmdparse.formatters import (
|
||||
format_json,
|
||||
format_yaml,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"""End-to-end integration tests for cmdparse CLI."""
|
||||
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
from cmdparse.cli import main
|
||||
|
||||
@@ -88,7 +87,7 @@ class TestIntegrationScenarios:
|
||||
def test_docker_ps_style_output(self):
|
||||
runner = CliRunner()
|
||||
input_text = """CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
abc123 nginx:1 "nginx" 1h ago Up 80/tcp nginx"""
|
||||
abc123 nginx:1 \"nginx\" 1h ago Up 80/tcp nginx"""
|
||||
result = runner.invoke(main, ['-o', 'json', '-q'], input=input_text)
|
||||
assert result.exit_code == 0
|
||||
assert 'nginx' in result.output
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"""Tests for pattern detection module."""
|
||||
|
||||
import pytest
|
||||
from cmdparse.patterns import detect_pattern_type
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user