From c3b8ef79d48adc4471ae47fd7fe07baa46e6d7c1 Mon Sep 17 00:00:00 2001
From: 7000pctAUTO <lukems3823@gmail.com>
Date: Sat, 31 Jan 2026 00:57:32 +0000
Subject: [PATCH] Add generators: man, markdown, and HTML generators with
 validators

---
 doc2man/generators/html.py | 133 +++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 doc2man/generators/html.py
diff --git a/doc2man/generators/html.py b/doc2man/generators/html.py
new file mode 100644
index 0000000..25cd85c
--- /dev/null
+++ b/doc2man/generators/html.py
@@ -0,0 +1,133 @@
+"""HTML generator for Doc2Man."""
+
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from jinja2 import Environment, FileSystemLoader, TemplateSyntaxError
+
+
+def generate_html(
+    parsed_data: List[Dict[str, Any]],
+    output_path: Path,
+    template_path: Optional[Path] = None,
+) -> str:
+    """Generate HTML documentation from parsed data.
+
+    Args:
+        parsed_data: List of parsed documentation dictionaries.
+        output_path: Path to write the HTML file.
+        template_path: Optional custom template file.
+
+    Returns:
+        The generated HTML content.
+    """
+    env = Environment(
+        loader=FileSystemLoader(str(template_path.parent) if template_path else get_template_dir()),
+        autoescape=True,
+    )
+    env.filters['first_line'] = first_line_filter
+
+    try:
+        if template_path:
+            template = env.from_string(template_path.read_text())
+        else:
+            template = env.get_template("html.j2")
+    except TemplateSyntaxError as e:
+        raise ValueError(f"Template syntax error: {e}")
+
+    html_content = template.render(
+        data=parsed_data,
+        title=get_html_title(parsed_data),
+    )
+
+    if output_path:
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_text(html_content, encoding="utf-8")
+
+    return html_content
+
+
+def first_line_filter(text: str) -> str:
+    """Get the first line of text."""
+    if not text:
+        return ""
+    lines = text.split('\n')
+    return lines[0] if lines else ""
+
+
+def get_template_dir() -> Path:
+    """Get the directory containing templates."""
+    return Path(__file__).parent.parent / "templates"
+
+
+def get_html_title(parsed_data: List[Dict[str, Any]]) -> str:
+    """Extract a title for the HTML document."""
+    for item in parsed_data:
+        data = item.get("data", {})
+        if data.get("title"):
+            return data["title"]
+        if data.get("functions"):
+            for func in data["functions"]:
+                if func.get("name"):
+                    return func["name"]
+    return "Documentation"
+
+
+class HTMLValidator:
+    """Validator for HTML format."""
+
+    @staticmethod
+    def validate(content: str) -> List[str]:
+        """Validate HTML content.
+
+        Args:
+            content: The HTML content to validate.
+
+        Returns:
+            List of validation warnings.
+        """
+        warnings = []
+
+        if "<!DOCTYPE html>" not in content and "<!doctype html>" not in content:
+            warnings.append("Missing DOCTYPE declaration")
+
+        if "<html" not in content.lower():
+            warnings.append("Missing <html> tag")
+
+        if "<head>" not in content.lower():
+            warnings.append("Missing <head> tag")
+
+        if "<body>" not in content.lower():
+            warnings.append("Missing <body> tag")
+
+        if "<title>" not in content.lower():
+            warnings.append("Missing <title> tag")
+
+        if "</html>" not in content.lower():
+            warnings.append("Missing closing </html> tag")
+
+        return warnings
+
+    @staticmethod
+    def validate_links(content: str) -> List[str]:
+        """Validate HTML links.
+
+        Args:
+            content: The HTML content to validate.
+
+        Returns:
+            List of validation errors.
+        """
+        import re
+
+        errors = []
+        link_pattern = re.compile(r'href="([^"]*)"', re.IGNORECASE)
+
+        for match in link_pattern.finditer(content):
+            href = match.group(1)
+            if href.startswith("#"):
+                anchor = href[1:]
+                if f'id="{anchor}"' not in content and f'name="{anchor}"' not in content:
+                    errors.append(f"Orphan anchor reference: #{anchor}")
+
+        return errors