Files
man-card/man_card/man_parser.py
2026-01-31 21:39:48 +00:00

139 lines
5.3 KiB
Python

"""Man page parser module."""
import subprocess
import re
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class Option:
"""Represents a command option/flag."""
flag: str
description: str
argument: Optional[str] = None
@dataclass
class CommandInfo:
"""Structured information extracted from a man page."""
name: str
synopsis: str = ""
description: str = ""
options: list[Option] = field(default_factory=list)
examples: list[str] = field(default_factory=list)
section: str = "1"
class ManPageParser:
"""Parser for Unix man pages."""
SECTION_PATTERN = re.compile(r'^([A-Z][A-Z\s]+)$')
NAME_PATTERN = re.compile(r'^([a-zA-Z0-9_-]+)\s*-\s*(.+)$')
SYNOPSIS_PATTERN = re.compile(r'^\s*(?:\\fB)?(.+?)(?:\\fR)?\s*$')
OPTION_PATTERN = re.compile(r'^(\-[a-zA-Z0-9],?\s*(--[a-zA-Z0-9-]+)?)\s+(.+)$')
BLANK_LINE = re.compile(r'^\s*$')
FONT_PATTERN = re.compile(r'\\f[BIRP]')
def __init__(self):
pass
def parse(self, command: str, section: str = "1") -> CommandInfo:
"""Parse a man page for the given command."""
try:
env = {"PAGER": "", "COLUMNS": "200"}
result = subprocess.run(
["man", "-P", "cat", section, command],
capture_output=True,
text=True,
env={**dict(__import__("os").environ), **env},
timeout=30
)
if result.returncode != 0:
raise ValueError(f"man: {command}: command not found")
return self._parse_content(result.stdout, command)
except subprocess.CalledProcessError as e:
raise ValueError(f"man: {command}: command not found") from e
except subprocess.TimeoutExpired:
raise ValueError(f"man: {command}: timeout while fetching man page")
def _parse_content(self, content: str, command: str) -> CommandInfo:
"""Parse man page content into structured CommandInfo."""
info = CommandInfo(name=command)
lines = content.split('\n')
current_section = ""
buffer: list[str] = []
options_buffer: list[tuple[str, str, str]] = []
for i, line in enumerate(lines):
stripped = line.strip()
section_match = self.SECTION_PATTERN.match(stripped)
if section_match:
section_name = section_match.group(1).strip()
if section_name == "NAME":
if buffer:
self._process_section(info, current_section, buffer, options_buffer)
current_section = "NAME"
buffer = []
options_buffer = []
elif section_name in ("SYNOPSIS", "OPTIONS", "DESCRIPTION", "EXAMPLES"):
if buffer and current_section:
self._process_section(info, current_section, buffer, options_buffer)
current_section = section_name
buffer = []
options_buffer = []
else:
if buffer and current_section:
self._process_section(info, current_section, buffer, options_buffer)
current_section = ""
buffer = []
options_buffer = []
elif current_section:
processed = self.FONT_PATTERN.sub('', stripped)
if processed and not self.BLANK_LINE.match(processed):
buffer.append(processed)
elif buffer and self.BLANK_LINE.match(processed):
buffer.append("")
if buffer and current_section:
self._process_section(info, current_section, buffer, options_buffer)
if not info.synopsis and buffer:
for line in lines:
name_match = self.NAME_PATTERN.match(line.strip())
if name_match:
info.synopsis = name_match.group(1)
break
return info
def _process_section(self, info: CommandInfo, section: str, buffer: list[str], options_buffer: list[tuple]):
"""Process a parsed section and populate CommandInfo."""
text = ' '.join(line for line in buffer if line.strip())
if section == "NAME":
for line in buffer:
match = self.NAME_PATTERN.match(line.strip())
if match:
info.name = match.group(1)
break
elif section == "SYNOPSIS":
synopsis_lines = [line for line in buffer if line.strip()]
if synopsis_lines:
info.synopsis = ' '.join(synopsis_lines)
elif section == "OPTIONS":
for line in buffer:
match = self.OPTION_PATTERN.match(line)
if match:
flag = match.group(1).strip()
long_opt = match.group(2) if match.group(2) else ""
desc = match.group(3).strip() if match.group(3) else ""
info.options.append(Option(flag=flag, description=desc))
elif section == "DESCRIPTION":
info.description = text
elif section == "EXAMPLES":
examples = [line for line in buffer if line.strip() and not line.strip().startswith('#')]
info.examples.extend(examples)