Add generators: man, markdown, and HTML generators with validators

2026-01-31 00:57:32 +00:00
parent ac7f44005c
commit c3b8ef79d4
1 changed files with 133 additions and 0 deletions
--- a/doc2man/generators/html.py
+++ b/doc2man/generators/html.py
@@ -0,0 +1,133 @@
 """HTML generator for Doc2Man."""
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 from jinja2 import Environment, FileSystemLoader, TemplateSyntaxError
 def generate_html(
    parsed_data: List[Dict[str, Any]],
    output_path: Path,
    template_path: Optional[Path] = None,
 ) -> str:
    """Generate HTML documentation from parsed data.
    Args:
        parsed_data: List of parsed documentation dictionaries.
        output_path: Path to write the HTML file.
        template_path: Optional custom template file.
    Returns:
        The generated HTML content.
    """
    env = Environment(
        loader=FileSystemLoader(str(template_path.parent) if template_path else get_template_dir()),
        autoescape=True,
    )
    env.filters['first_line'] = first_line_filter
    try:
        if template_path:
            template = env.from_string(template_path.read_text())
        else:
            template = env.get_template("html.j2")
    except TemplateSyntaxError as e:
        raise ValueError(f"Template syntax error: {e}")
    html_content = template.render(
        data=parsed_data,
        title=get_html_title(parsed_data),
    )
    if output_path:
        output_path.parent.mkdir(parents=True, exist_ok=True)
        output_path.write_text(html_content, encoding="utf-8")
    return html_content
 def first_line_filter(text: str) -> str:
    """Get the first line of text."""
    if not text:
        return ""
    lines = text.split('\n')
    return lines[0] if lines else ""
 def get_template_dir() -> Path:
    """Get the directory containing templates."""
    return Path(__file__).parent.parent / "templates"
 def get_html_title(parsed_data: List[Dict[str, Any]]) -> str:
    """Extract a title for the HTML document."""
    for item in parsed_data:
        data = item.get("data", {})
        if data.get("title"):
            return data["title"]
        if data.get("functions"):
            for func in data["functions"]:
                if func.get("name"):
                    return func["name"]
    return "Documentation"
 class HTMLValidator:
    """Validator for HTML format."""
    @staticmethod
    def validate(content: str) -> List[str]:
        """Validate HTML content.
        Args:
            content: The HTML content to validate.
        Returns:
            List of validation warnings.
        """
        warnings = []
        if "<!DOCTYPE html>" not in content and "<!doctype html>" not in content:
            warnings.append("Missing DOCTYPE declaration")
        if "<html" not in content.lower():
            warnings.append("Missing <html> tag")
        if "<head>" not in content.lower():
            warnings.append("Missing <head> tag")
        if "<body>" not in content.lower():
            warnings.append("Missing <body> tag")
        if "<title>" not in content.lower():
            warnings.append("Missing <title> tag")
        if "</html>" not in content.lower():
            warnings.append("Missing closing </html> tag")
        return warnings
    @staticmethod
    def validate_links(content: str) -> List[str]:
        """Validate HTML links.
        Args:
            content: The HTML content to validate.
        Returns:
            List of validation errors.
        """
        import re
        errors = []
        link_pattern = re.compile(r'href="([^"]*)"', re.IGNORECASE)
        for match in link_pattern.finditer(content):
            href = match.group(1)
            if href.startswith("#"):
                anchor = href[1:]
                if f'id="{anchor}"' not in content and f'name="{anchor}"' not in content:
                    errors.append(f"Orphan anchor reference: #{anchor}")
        return errors