From c3b8ef79d48adc4471ae47fd7fe07baa46e6d7c1 Mon Sep 17 00:00:00 2001 From: 7000pctAUTO Date: Sat, 31 Jan 2026 00:57:32 +0000 Subject: [PATCH] Add generators: man, markdown, and HTML generators with validators --- doc2man/generators/html.py | 133 +++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 doc2man/generators/html.py diff --git a/doc2man/generators/html.py b/doc2man/generators/html.py new file mode 100644 index 0000000..25cd85c --- /dev/null +++ b/doc2man/generators/html.py @@ -0,0 +1,133 @@ +"""HTML generator for Doc2Man.""" + +from pathlib import Path +from typing import Any, Dict, List, Optional + +from jinja2 import Environment, FileSystemLoader, TemplateSyntaxError + + +def generate_html( + parsed_data: List[Dict[str, Any]], + output_path: Path, + template_path: Optional[Path] = None, +) -> str: + """Generate HTML documentation from parsed data. + + Args: + parsed_data: List of parsed documentation dictionaries. + output_path: Path to write the HTML file. + template_path: Optional custom template file. + + Returns: + The generated HTML content. + """ + env = Environment( + loader=FileSystemLoader(str(template_path.parent) if template_path else get_template_dir()), + autoescape=True, + ) + env.filters['first_line'] = first_line_filter + + try: + if template_path: + template = env.from_string(template_path.read_text()) + else: + template = env.get_template("html.j2") + except TemplateSyntaxError as e: + raise ValueError(f"Template syntax error: {e}") + + html_content = template.render( + data=parsed_data, + title=get_html_title(parsed_data), + ) + + if output_path: + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(html_content, encoding="utf-8") + + return html_content + + +def first_line_filter(text: str) -> str: + """Get the first line of text.""" + if not text: + return "" + lines = text.split('\n') + return lines[0] if lines else "" + + +def get_template_dir() -> Path: + """Get the directory containing templates.""" + return Path(__file__).parent.parent / "templates" + + +def get_html_title(parsed_data: List[Dict[str, Any]]) -> str: + """Extract a title for the HTML document.""" + for item in parsed_data: + data = item.get("data", {}) + if data.get("title"): + return data["title"] + if data.get("functions"): + for func in data["functions"]: + if func.get("name"): + return func["name"] + return "Documentation" + + +class HTMLValidator: + """Validator for HTML format.""" + + @staticmethod + def validate(content: str) -> List[str]: + """Validate HTML content. + + Args: + content: The HTML content to validate. + + Returns: + List of validation warnings. + """ + warnings = [] + + if "" not in content and "" not in content: + warnings.append("Missing DOCTYPE declaration") + + if " tag") + + if "" not in content.lower(): + warnings.append("Missing tag") + + if "" not in content.lower(): + warnings.append("Missing tag") + + if "" not in content.lower(): + warnings.append("Missing <title> tag") + + if "</html>" not in content.lower(): + warnings.append("Missing closing </html> tag") + + return warnings + + @staticmethod + def validate_links(content: str) -> List[str]: + """Validate HTML links. + + Args: + content: The HTML content to validate. + + Returns: + List of validation errors. + """ + import re + + errors = [] + link_pattern = re.compile(r'href="([^"]*)"', re.IGNORECASE) + + for match in link_pattern.finditer(content): + href = match.group(1) + if href.startswith("#"): + anchor = href[1:] + if f'id="{anchor}"' not in content and f'name="{anchor}"' not in content: + errors.append(f"Orphan anchor reference: #{anchor}") + + return errors