Add generators: man, markdown, and HTML generators with validators
Some checks failed
CI / build (push) Has been cancelled
CI / test (push) Has been cancelled

This commit is contained in:
2026-01-31 00:57:32 +00:00
parent ac7f44005c
commit c3b8ef79d4

133
doc2man/generators/html.py Normal file
View File

@@ -0,0 +1,133 @@
"""HTML generator for Doc2Man."""
from pathlib import Path
from typing import Any, Dict, List, Optional
from jinja2 import Environment, FileSystemLoader, TemplateSyntaxError
def generate_html(
parsed_data: List[Dict[str, Any]],
output_path: Path,
template_path: Optional[Path] = None,
) -> str:
"""Generate HTML documentation from parsed data.
Args:
parsed_data: List of parsed documentation dictionaries.
output_path: Path to write the HTML file.
template_path: Optional custom template file.
Returns:
The generated HTML content.
"""
env = Environment(
loader=FileSystemLoader(str(template_path.parent) if template_path else get_template_dir()),
autoescape=True,
)
env.filters['first_line'] = first_line_filter
try:
if template_path:
template = env.from_string(template_path.read_text())
else:
template = env.get_template("html.j2")
except TemplateSyntaxError as e:
raise ValueError(f"Template syntax error: {e}")
html_content = template.render(
data=parsed_data,
title=get_html_title(parsed_data),
)
if output_path:
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(html_content, encoding="utf-8")
return html_content
def first_line_filter(text: str) -> str:
"""Get the first line of text."""
if not text:
return ""
lines = text.split('\n')
return lines[0] if lines else ""
def get_template_dir() -> Path:
"""Get the directory containing templates."""
return Path(__file__).parent.parent / "templates"
def get_html_title(parsed_data: List[Dict[str, Any]]) -> str:
"""Extract a title for the HTML document."""
for item in parsed_data:
data = item.get("data", {})
if data.get("title"):
return data["title"]
if data.get("functions"):
for func in data["functions"]:
if func.get("name"):
return func["name"]
return "Documentation"
class HTMLValidator:
"""Validator for HTML format."""
@staticmethod
def validate(content: str) -> List[str]:
"""Validate HTML content.
Args:
content: The HTML content to validate.
Returns:
List of validation warnings.
"""
warnings = []
if "<!DOCTYPE html>" not in content and "<!doctype html>" not in content:
warnings.append("Missing DOCTYPE declaration")
if "<html" not in content.lower():
warnings.append("Missing <html> tag")
if "<head>" not in content.lower():
warnings.append("Missing <head> tag")
if "<body>" not in content.lower():
warnings.append("Missing <body> tag")
if "<title>" not in content.lower():
warnings.append("Missing <title> tag")
if "</html>" not in content.lower():
warnings.append("Missing closing </html> tag")
return warnings
@staticmethod
def validate_links(content: str) -> List[str]:
"""Validate HTML links.
Args:
content: The HTML content to validate.
Returns:
List of validation errors.
"""
import re
errors = []
link_pattern = re.compile(r'href="([^"]*)"', re.IGNORECASE)
for match in link_pattern.finditer(content):
href = match.group(1)
if href.startswith("#"):
anchor = href[1:]
if f'id="{anchor}"' not in content and f'name="{anchor}"' not in content:
errors.append(f"Orphan anchor reference: #{anchor}")
return errors