"""Man page parser module.""" import subprocess import re from dataclasses import dataclass, field from typing import Optional @dataclass class Option: """Represents a command option/flag.""" flag: str description: str argument: Optional[str] = None @dataclass class CommandInfo: """Structured information extracted from a man page.""" name: str synopsis: str = "" description: str = "" options: list[Option] = field(default_factory=list) examples: list[str] = field(default_factory=list) section: str = "1" class ManPageParser: """Parser for Unix man pages.""" SECTION_PATTERN = re.compile(r'^([A-Z][A-Z\s]+)$') NAME_PATTERN = re.compile(r'^([a-zA-Z0-9_-]+)\s*-\s*(.+)$') SYNOPSIS_PATTERN = re.compile(r'^\s*(?:\\fB)?(.+?)(?:\\fR)?\s*$') OPTION_PATTERN = re.compile(r'^(\-[a-zA-Z0-9],?\s*(--[a-zA-Z0-9-]+)?)\s+(.+)$') BLANK_LINE = re.compile(r'^\s*$') FONT_PATTERN = re.compile(r'\\f[BIRP]') def __init__(self): pass def parse(self, command: str, section: str = "1") -> CommandInfo: """Parse a man page for the given command.""" try: env = {"PAGER": "", "COLUMNS": "200"} result = subprocess.run( ["man", "-P", "cat", section, command], capture_output=True, text=True, env={**dict(__import__("os").environ), **env}, timeout=30 ) if result.returncode != 0: raise ValueError(f"man: {command}: command not found") return self._parse_content(result.stdout, command) except subprocess.CalledProcessError as e: raise ValueError(f"man: {command}: command not found") from e except subprocess.TimeoutExpired: raise ValueError(f"man: {command}: timeout while fetching man page") def _parse_content(self, content: str, command: str) -> CommandInfo: """Parse man page content into structured CommandInfo.""" info = CommandInfo(name=command) lines = content.split('\n') current_section = "" buffer: list[str] = [] options_buffer: list[tuple[str, str, str]] = [] for i, line in enumerate(lines): stripped = line.strip() section_match = self.SECTION_PATTERN.match(stripped) if section_match: section_name = section_match.group(1).strip() if section_name == "NAME": if buffer: self._process_section(info, current_section, buffer, options_buffer) current_section = "NAME" buffer = [] options_buffer = [] elif section_name in ("SYNOPSIS", "OPTIONS", "DESCRIPTION", "EXAMPLES"): if buffer and current_section: self._process_section(info, current_section, buffer, options_buffer) current_section = section_name buffer = [] options_buffer = [] else: if buffer and current_section: self._process_section(info, current_section, buffer, options_buffer) current_section = "" buffer = [] options_buffer = [] elif current_section: processed = self.FONT_PATTERN.sub('', stripped) if processed and not self.BLANK_LINE.match(processed): buffer.append(processed) elif buffer and self.BLANK_LINE.match(processed): buffer.append("") if buffer and current_section: self._process_section(info, current_section, buffer, options_buffer) if not info.synopsis and buffer: for line in lines: name_match = self.NAME_PATTERN.match(line.strip()) if name_match: info.synopsis = name_match.group(1) break return info def _process_section(self, info: CommandInfo, section: str, buffer: list[str], options_buffer: list[tuple]): """Process a parsed section and populate CommandInfo.""" text = ' '.join(line for line in buffer if line.strip()) if section == "NAME": for line in buffer: match = self.NAME_PATTERN.match(line.strip()) if match: info.name = match.group(1) break elif section == "SYNOPSIS": synopsis_lines = [line for line in buffer if line.strip()] if synopsis_lines: info.synopsis = ' '.join(synopsis_lines) elif section == "OPTIONS": for line in buffer: match = self.OPTION_PATTERN.match(line) if match: flag = match.group(1).strip() long_opt = match.group(2) if match.group(2) else "" desc = match.group(3).strip() if match.group(3) else "" info.options.append(Option(flag=flag, description=desc)) elif section == "DESCRIPTION": info.description = text elif section == "EXAMPLES": examples = [line for line in buffer if line.strip() and not line.strip().startswith('#')] info.examples.extend(examples)