Initial upload: man-card CLI tool with PDF/PNG generation, templates, and tests
Some checks failed
CI / test (push) Has been cancelled
Some checks failed
CI / test (push) Has been cancelled
This commit is contained in:
138
man_card/man_parser.py
Normal file
138
man_card/man_parser.py
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
"""Man page parser module."""
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Option:
|
||||||
|
"""Represents a command option/flag."""
|
||||||
|
flag: str
|
||||||
|
description: str
|
||||||
|
argument: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CommandInfo:
|
||||||
|
"""Structured information extracted from a man page."""
|
||||||
|
name: str
|
||||||
|
synopsis: str = ""
|
||||||
|
description: str = ""
|
||||||
|
options: list[Option] = field(default_factory=list)
|
||||||
|
examples: list[str] = field(default_factory=list)
|
||||||
|
section: str = "1"
|
||||||
|
|
||||||
|
|
||||||
|
class ManPageParser:
|
||||||
|
"""Parser for Unix man pages."""
|
||||||
|
|
||||||
|
SECTION_PATTERN = re.compile(r'^([A-Z][A-Z\s]+)$')
|
||||||
|
NAME_PATTERN = re.compile(r'^([a-zA-Z0-9_-]+)\s*-\s*(.+)$')
|
||||||
|
SYNOPSIS_PATTERN = re.compile(r'^\s*(?:\\fB)?(.+?)(?:\\fR)?\s*$')
|
||||||
|
OPTION_PATTERN = re.compile(r'^(\-[a-zA-Z0-9],?\s*(--[a-zA-Z0-9-]+)?)\s+(.+)$')
|
||||||
|
BLANK_LINE = re.compile(r'^\s*$')
|
||||||
|
FONT_PATTERN = re.compile(r'\\f[BIRP]')
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def parse(self, command: str, section: str = "1") -> CommandInfo:
|
||||||
|
"""Parse a man page for the given command."""
|
||||||
|
try:
|
||||||
|
env = {"PAGER": "", "COLUMNS": "200"}
|
||||||
|
result = subprocess.run(
|
||||||
|
["man", "-P", "cat", section, command],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
env={**dict(__import__("os").environ), **env},
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise ValueError(f"man: {command}: command not found")
|
||||||
|
return self._parse_content(result.stdout, command)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
raise ValueError(f"man: {command}: command not found") from e
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
raise ValueError(f"man: {command}: timeout while fetching man page")
|
||||||
|
|
||||||
|
def _parse_content(self, content: str, command: str) -> CommandInfo:
|
||||||
|
"""Parse man page content into structured CommandInfo."""
|
||||||
|
info = CommandInfo(name=command)
|
||||||
|
lines = content.split('\n')
|
||||||
|
current_section = ""
|
||||||
|
buffer: list[str] = []
|
||||||
|
options_buffer: list[tuple[str, str, str]] = []
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
section_match = self.SECTION_PATTERN.match(stripped)
|
||||||
|
|
||||||
|
if section_match:
|
||||||
|
section_name = section_match.group(1).strip()
|
||||||
|
|
||||||
|
if section_name == "NAME":
|
||||||
|
if buffer:
|
||||||
|
self._process_section(info, current_section, buffer, options_buffer)
|
||||||
|
current_section = "NAME"
|
||||||
|
buffer = []
|
||||||
|
options_buffer = []
|
||||||
|
elif section_name in ("SYNOPSIS", "OPTIONS", "DESCRIPTION", "EXAMPLES"):
|
||||||
|
if buffer and current_section:
|
||||||
|
self._process_section(info, current_section, buffer, options_buffer)
|
||||||
|
current_section = section_name
|
||||||
|
buffer = []
|
||||||
|
options_buffer = []
|
||||||
|
else:
|
||||||
|
if buffer and current_section:
|
||||||
|
self._process_section(info, current_section, buffer, options_buffer)
|
||||||
|
current_section = ""
|
||||||
|
buffer = []
|
||||||
|
options_buffer = []
|
||||||
|
elif current_section:
|
||||||
|
processed = self.FONT_PATTERN.sub('', stripped)
|
||||||
|
if processed and not self.BLANK_LINE.match(processed):
|
||||||
|
buffer.append(processed)
|
||||||
|
elif buffer and self.BLANK_LINE.match(processed):
|
||||||
|
buffer.append("")
|
||||||
|
|
||||||
|
if buffer and current_section:
|
||||||
|
self._process_section(info, current_section, buffer, options_buffer)
|
||||||
|
|
||||||
|
if not info.synopsis and buffer:
|
||||||
|
for line in lines:
|
||||||
|
name_match = self.NAME_PATTERN.match(line.strip())
|
||||||
|
if name_match:
|
||||||
|
info.synopsis = name_match.group(1)
|
||||||
|
break
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
def _process_section(self, info: CommandInfo, section: str, buffer: list[str], options_buffer: list[tuple]):
|
||||||
|
"""Process a parsed section and populate CommandInfo."""
|
||||||
|
text = ' '.join(line for line in buffer if line.strip())
|
||||||
|
|
||||||
|
if section == "NAME":
|
||||||
|
for line in buffer:
|
||||||
|
match = self.NAME_PATTERN.match(line.strip())
|
||||||
|
if match:
|
||||||
|
info.name = match.group(1)
|
||||||
|
break
|
||||||
|
elif section == "SYNOPSIS":
|
||||||
|
synopsis_lines = [line for line in buffer if line.strip()]
|
||||||
|
if synopsis_lines:
|
||||||
|
info.synopsis = ' '.join(synopsis_lines)
|
||||||
|
elif section == "OPTIONS":
|
||||||
|
for line in buffer:
|
||||||
|
match = self.OPTION_PATTERN.match(line)
|
||||||
|
if match:
|
||||||
|
flag = match.group(1).strip()
|
||||||
|
long_opt = match.group(2) if match.group(2) else ""
|
||||||
|
desc = match.group(3).strip() if match.group(3) else ""
|
||||||
|
info.options.append(Option(flag=flag, description=desc))
|
||||||
|
elif section == "DESCRIPTION":
|
||||||
|
info.description = text
|
||||||
|
elif section == "EXAMPLES":
|
||||||
|
examples = [line for line in buffer if line.strip() and not line.strip().startswith('#')]
|
||||||
|
info.examples.extend(examples)
|
||||||
Reference in New Issue
Block a user