roadwave/docs/scripts/generate-pdf-docs.py

#!/usr/bin/env python3
"""
Script pour générer un PDF complet de toute la documentation MkDocs.
Utilise weasyprint pour convertir le HTML en PDF.
"""

import os
import re
import subprocess
import tempfile
import shutil
from pathlib import Path
from typing import List, Dict, Any
import yaml


class SafeLineLoader(yaml.SafeLoader):
    """Loader YAML qui ignore les tags Python non supportés"""
    pass


# Ignorer les tags Python custom (pour les extensions MkDocs)
SafeLineLoader.add_multi_constructor('tag:yaml.org,2002:python/', lambda loader, suffix, node: None)


def parse_nav(nav: List, docs_dir: Path, prefix: str = "") -> List[Path]:
    """Parse la navigation MkDocs et retourne la liste ordonnée des fichiers MD"""
    files = []
    for item in nav:
        if isinstance(item, str):
            # Fichier simple ou dossier
            path = docs_dir / item
            if path.is_file() and path.suffix == '.md':
                files.append(path)
            elif path.is_dir():
                # C'est un dossier, récupérer tous les fichiers MD
                files.extend(sorted(path.rglob('*.md')))
            elif item.endswith('/'):
                # Référence à un dossier (ex: bdd/)
                dir_path = docs_dir / item.rstrip('/')
                if dir_path.is_dir():
                    files.extend(sorted(dir_path.rglob('*.md')))
        elif isinstance(item, dict):
            for title, value in item.items():
                if isinstance(value, str):
                    # Fichier avec titre ou dossier
                    path = docs_dir / value
                    if path.is_file() and path.suffix == '.md':
                        files.append(path)
                    elif path.is_dir():
                        files.extend(sorted(path.rglob('*.md')))
                    elif value.endswith('/'):
                        dir_path = docs_dir / value.rstrip('/')
                        if dir_path.is_dir():
                            files.extend(sorted(dir_path.rglob('*.md')))
                elif isinstance(value, list):
                    # Sous-section
                    files.extend(parse_nav(value, docs_dir, prefix + "  "))
    return files


def get_all_md_files(docs_dir: Path, mkdocs_config: Dict[str, Any]) -> List[Path]:
    """Récupère tous les fichiers MD dans l'ordre de navigation"""
    if 'nav' in mkdocs_config:
        return parse_nav(mkdocs_config['nav'], docs_dir)
    else:
        # Pas de nav, récupérer tous les fichiers MD
        return sorted(docs_dir.rglob('*.md'))


def preprocess_markdown(content: str, file_path: Path, docs_dir: Path) -> str:
    """Prétraite le Markdown pour le PDF"""
    # Convertir les liens relatifs
    def fix_link(match):
        link = match.group(2)
        if link.startswith('http'):
            return match.group(0)
        # Convertir les liens .md en ancres
        if link.endswith('.md'):
            link = link[:-3]
        return f'[{match.group(1)}](#{link})'

    content = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', fix_link, content)

    # Supprimer les admonitions MkDocs et les convertir en blockquotes
    # !!! type "title" -> blockquote
    content = re.sub(
        r'!!! (\w+) "([^"]+)"\n\n((?:    .+\n)+)',
        lambda m: f'> **{m.group(2)}**\n>\n' + '\n'.join(f'> {line[4:]}' for line in m.group(3).split('\n') if line),
        content
    )

    # ??? type "title" -> blockquote (collapsible)
    content = re.sub(
        r'\?\?\??\+? (\w+) "([^"]+)"\n\n((?:    .+\n)+)',
        lambda m: f'> **{m.group(2)}**\n>\n' + '\n'.join(f'> {line[4:]}' for line in m.group(3).split('\n') if line),
        content
    )

    return content


def create_combined_markdown(files: List[Path], docs_dir: Path, output_path: Path) -> None:
    """Combine tous les fichiers MD en un seul"""
    combined = []

    # Page de titre
    combined.append("# Documentation RoadWave\n\n")
    combined.append("---\n\n")
    combined.append("## Table des matières\n\n")

    # Générer la table des matières
    toc_entries = []
    for f in files:
        with open(f, 'r', encoding='utf-8') as file:
            content = file.read()
            # Extraire le titre H1
            title_match = re.search(r'^# (.+)$', content, re.MULTILINE)
            if title_match:
                title = title_match.group(1)
                anchor = re.sub(r'[^\w\s-]', '', title.lower()).replace(' ', '-')
                toc_entries.append(f"- [{title}](#{anchor})")

    combined.append('\n'.join(toc_entries))
    combined.append("\n\n---\n\n")
    combined.append('<div style="page-break-after: always;"></div>\n\n')

    # Ajouter chaque fichier
    for f in files:
        with open(f, 'r', encoding='utf-8') as file:
            content = file.read()

        # Prétraiter
        content = preprocess_markdown(content, f, docs_dir)

        combined.append(content)
        combined.append("\n\n")
        combined.append('<div style="page-break-after: always;"></div>\n\n')

    with open(output_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(combined))


def markdown_to_html(md_path: Path, html_path: Path) -> None:
    """Convertit Markdown en HTML avec styles"""
    import markdown
    from markdown.extensions.tables import TableExtension
    from markdown.extensions.fenced_code import FencedCodeExtension
    from markdown.extensions.toc import TocExtension

    with open(md_path, 'r', encoding='utf-8') as f:
        md_content = f.read()

    # Convertir en HTML
    md = markdown.Markdown(extensions=[
        'tables',
        'fenced_code',
        'toc',
        'attr_list',
        'md_in_html'
    ])
    html_content = md.convert(md_content)

    # Template HTML avec styles
    html_template = f'''<!DOCTYPE html>
<html lang="fr">
<head>
    <meta charset="UTF-8">
    <title>Documentation RoadWave</title>
    <style>
        @page {{
            size: A4;
            margin: 2cm;
            @bottom-center {{
                content: counter(page);
            }}
        }}

        body {{
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
            font-size: 11pt;
            line-height: 1.6;
            color: #333;
            max-width: 100%;
        }}

        h1 {{
            color: #1a237e;
            border-bottom: 3px solid #3f51b5;
            padding-bottom: 10px;
            page-break-after: avoid;
            font-size: 24pt;
        }}

        h2 {{
            color: #303f9f;
            border-bottom: 1px solid #7986cb;
            padding-bottom: 5px;
            margin-top: 30px;
            page-break-after: avoid;
            font-size: 16pt;
        }}

        h3 {{
            color: #3949ab;
            page-break-after: avoid;
            font-size: 13pt;
        }}

        /* Couleurs Gherkin */
        span[style*="#2196F3"] {{ color: #1565c0 !important; font-weight: bold; }}  /* Étant donné - Bleu */
        span[style*="#FF9800"] {{ color: #e65100 !important; font-weight: bold; }}  /* Quand - Orange */
        span[style*="#4CAF50"] {{ color: #2e7d32 !important; font-weight: bold; }}  /* Alors - Vert */
        span[style*="#9E9E9E"] {{ color: #616161 !important; }}                      /* Et - Gris */
        span[style*="#F44336"] {{ color: #c62828 !important; font-weight: bold; }}  /* Mais - Rouge */

        table {{
            border-collapse: collapse;
            width: 100%;
            margin: 15px 0;
            font-size: 10pt;
        }}

        th, td {{
            border: 1px solid #ddd;
            padding: 8px;
            text-align: left;
        }}

        th {{
            background-color: #3f51b5;
            color: white;
        }}

        tr:nth-child(even) {{
            background-color: #f5f5f5;
        }}

        blockquote {{
            border-left: 4px solid #3f51b5;
            margin: 15px 0;
            padding: 10px 20px;
            background-color: #e8eaf6;
            font-style: italic;
        }}

        code {{
            background-color: #f5f5f5;
            padding: 2px 6px;
            border-radius: 3px;
            font-family: "Fira Code", "Consolas", monospace;
            font-size: 10pt;
        }}

        pre {{
            background-color: #263238;
            color: #aed581;
            padding: 15px;
            border-radius: 5px;
            overflow-x: auto;
            font-size: 9pt;
        }}

        pre code {{
            background-color: transparent;
            padding: 0;
            color: inherit;
        }}

        hr {{
            border: none;
            border-top: 1px solid #e0e0e0;
            margin: 30px 0;
        }}

        a {{
            color: #1976d2;
            text-decoration: none;
        }}

        /* Info box (contexte) */
        .info-box {{
            background-color: #e3f2fd;
            border-left: 4px solid #2196f3;
            padding: 15px;
            margin: 15px 0;
        }}

        /* Page breaks */
        .page-break {{
            page-break-after: always;
        }}

        /* Cover page */
        .cover {{
            text-align: center;
            padding-top: 200px;
        }}

        .cover h1 {{
            font-size: 36pt;
            border: none;
        }}

        /* TOC */
        .toc {{
            page-break-after: always;
        }}

        .toc ul {{
            list-style: none;
            padding-left: 20px;
        }}

        .toc li {{
            margin: 5px 0;
        }}
    </style>
</head>
<body>
{html_content}
</body>
</html>'''

    with open(html_path, 'w', encoding='utf-8') as f:
        f.write(html_template)


def html_to_pdf(html_path: Path, pdf_path: Path) -> None:
    """Convertit HTML en PDF avec weasyprint"""
    from weasyprint import HTML, CSS

    print(f"  Conversion HTML → PDF...")
    HTML(filename=str(html_path)).write_pdf(str(pdf_path))


def main():
    """Point d'entrée principal"""
    project_root = Path(__file__).parent.parent
    docs_dir = project_root / 'docs'
    mkdocs_path = project_root / 'mkdocs.yml'
    output_dir = project_root / 'docs' / 'generated' / 'pdf'

    output_dir.mkdir(parents=True, exist_ok=True)

    print("📄 Génération du PDF de la documentation RoadWave...")

    # Charger la config MkDocs
    with open(mkdocs_path, 'r', encoding='utf-8') as f:
        mkdocs_config = yaml.load(f, Loader=SafeLineLoader)

    # Récupérer tous les fichiers MD
    print("  Collecte des fichiers Markdown...")
    md_files = get_all_md_files(docs_dir, mkdocs_config)
    print(f"  → {len(md_files)} fichiers trouvés")

    # Créer un fichier MD combiné
    combined_md = output_dir / 'documentation_complete.md'
    print("  Combinaison des fichiers...")
    create_combined_markdown(md_files, docs_dir, combined_md)

    # Convertir en HTML
    html_path = output_dir / 'documentation_complete.html'
    print("  Conversion Markdown → HTML...")
    markdown_to_html(combined_md, html_path)

    # Convertir en PDF
    pdf_path = output_dir / 'RoadWave_Documentation.pdf'
    html_to_pdf(html_path, pdf_path)

    print(f"\n✅ PDF généré: {pdf_path}")
    print(f"   Taille: {pdf_path.stat().st_size / 1024 / 1024:.2f} MB")


if __name__ == '__main__':
    main()