From ad11005c46d82f7bdcfb65496a18ff074ef19e49 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Sat, 31 Jan 2026 21:39:48 +0000 Subject: [PATCH] Initial upload: man-card CLI tool with PDF/PNG generation, templates, and tests --- man_card/man_parser.py | 138 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 man_card/man_parser.py diff --git a/man_card/man_parser.py b/man_card/man_parser.py new file mode 100644 index 0000000..35a0634 --- /dev/null +++ b/man_card/man_parser.py @@ -0,0 +1,138 @@ +"""Man page parser module.""" + +import subprocess +import re +from dataclasses import dataclass, field +from typing import Optional + + +@dataclass +class Option: + """Represents a command option/flag.""" + flag: str + description: str + argument: Optional[str] = None + + +@dataclass +class CommandInfo: + """Structured information extracted from a man page.""" + name: str + synopsis: str = "" + description: str = "" + options: list[Option] = field(default_factory=list) + examples: list[str] = field(default_factory=list) + section: str = "1" + + +class ManPageParser: + """Parser for Unix man pages.""" + + SECTION_PATTERN = re.compile(r'^([A-Z][A-Z\s]+)$') + NAME_PATTERN = re.compile(r'^([a-zA-Z0-9_-]+)\s*-\s*(.+)$') + SYNOPSIS_PATTERN = re.compile(r'^\s*(?:\\fB)?(.+?)(?:\\fR)?\s*$') + OPTION_PATTERN = re.compile(r'^(\-[a-zA-Z0-9],?\s*(--[a-zA-Z0-9-]+)?)\s+(.+)$') + BLANK_LINE = re.compile(r'^\s*$') + FONT_PATTERN = re.compile(r'\\f[BIRP]') + + def __init__(self): + pass + + def parse(self, command: str, section: str = "1") -> CommandInfo: + """Parse a man page for the given command.""" + try: + env = {"PAGER": "", "COLUMNS": "200"} + result = subprocess.run( + ["man", "-P", "cat", section, command], + capture_output=True, + text=True, + env={**dict(__import__("os").environ), **env}, + timeout=30 + ) + if result.returncode != 0: + raise ValueError(f"man: {command}: command not found") + return self._parse_content(result.stdout, command) + except subprocess.CalledProcessError as e: + raise ValueError(f"man: {command}: command not found") from e + except subprocess.TimeoutExpired: + raise ValueError(f"man: {command}: timeout while fetching man page") + + def _parse_content(self, content: str, command: str) -> CommandInfo: + """Parse man page content into structured CommandInfo.""" + info = CommandInfo(name=command) + lines = content.split('\n') + current_section = "" + buffer: list[str] = [] + options_buffer: list[tuple[str, str, str]] = [] + + for i, line in enumerate(lines): + stripped = line.strip() + section_match = self.SECTION_PATTERN.match(stripped) + + if section_match: + section_name = section_match.group(1).strip() + + if section_name == "NAME": + if buffer: + self._process_section(info, current_section, buffer, options_buffer) + current_section = "NAME" + buffer = [] + options_buffer = [] + elif section_name in ("SYNOPSIS", "OPTIONS", "DESCRIPTION", "EXAMPLES"): + if buffer and current_section: + self._process_section(info, current_section, buffer, options_buffer) + current_section = section_name + buffer = [] + options_buffer = [] + else: + if buffer and current_section: + self._process_section(info, current_section, buffer, options_buffer) + current_section = "" + buffer = [] + options_buffer = [] + elif current_section: + processed = self.FONT_PATTERN.sub('', stripped) + if processed and not self.BLANK_LINE.match(processed): + buffer.append(processed) + elif buffer and self.BLANK_LINE.match(processed): + buffer.append("") + + if buffer and current_section: + self._process_section(info, current_section, buffer, options_buffer) + + if not info.synopsis and buffer: + for line in lines: + name_match = self.NAME_PATTERN.match(line.strip()) + if name_match: + info.synopsis = name_match.group(1) + break + + return info + + def _process_section(self, info: CommandInfo, section: str, buffer: list[str], options_buffer: list[tuple]): + """Process a parsed section and populate CommandInfo.""" + text = ' '.join(line for line in buffer if line.strip()) + + if section == "NAME": + for line in buffer: + match = self.NAME_PATTERN.match(line.strip()) + if match: + info.name = match.group(1) + break + elif section == "SYNOPSIS": + synopsis_lines = [line for line in buffer if line.strip()] + if synopsis_lines: + info.synopsis = ' '.join(synopsis_lines) + elif section == "OPTIONS": + for line in buffer: + match = self.OPTION_PATTERN.match(line) + if match: + flag = match.group(1).strip() + long_opt = match.group(2) if match.group(2) else "" + desc = match.group(3).strip() if match.group(3) else "" + info.options.append(Option(flag=flag, description=desc)) + elif section == "DESCRIPTION": + info.description = text + elif section == "EXAMPLES": + examples = [line for line in buffer if line.strip() and not line.strip().startswith('#')] + info.examples.extend(examples)