mcp-ue/docgen/generate.py

#!/usr/bin/env python3
"""
generate.py — CLI for UnrealDocGenerator.

Usage:
    python generate.py <input> [input2 ...] <output_dir>

Each <input> can be a single .h file or a directory (processed recursively).
The last argument is always the output directory.

Two-pass pipeline:
  Pass 1 — parse every header, build a corpus-wide type index
  Pass 2 — render each header with cross-reference links injected
"""
import sys
import os
import re
from pathlib import Path

from ue_parser import parse_header, ParsedHeader
from ue_markdown import render_header


# ---------------------------------------------------------------------------
# Input collection
# ---------------------------------------------------------------------------

def collect_headers(input_arg: Path) -> list[tuple[Path, Path]]:
    """
    Returns a list of (header_path, base_path) pairs for the given input.
    base_path is used to compute relative output paths.
    """
    if input_arg.is_file():
        if input_arg.name.endswith('.generated.h'):
            print(f"Skipping generated header: {input_arg}", file=sys.stderr)
            return []
        return [(input_arg, input_arg.parent)]
    elif input_arg.is_dir():
        headers = [
            h for h in sorted(input_arg.rglob('*.h'))
            if 'Intermediate' not in h.parts and not h.name.endswith('.generated.h')
        ]
        return [(h, input_arg) for h in headers]
    else:
        print(f"Error: {input_arg} is not a file or directory", file=sys.stderr)
        return []


# ---------------------------------------------------------------------------
# Type index
# ---------------------------------------------------------------------------

def build_type_index(parsed_list: list[tuple[Path, Path, ParsedHeader]]) -> dict[str, str]:
    """
    Returns {TypeName: md_path_relative_to_docs_root} for every
    class, struct, enum, and delegate in the corpus.
    """
    index: dict[str, str] = {}
    for h, base, parsed in parsed_list:
        md_rel = _md_rel(h, base)
        for ci in parsed.classes:
            index[ci.name] = md_rel
        for ei in parsed.enums:
            index[ei.name] = md_rel
        for di in parsed.delegates:
            index[di.name] = md_rel
        # namespace names are not types — skip
    return index


def _md_rel(h: Path, base: Path) -> str:
    """Relative .md path for header h given its input base."""
    try:
        rel = h.relative_to(base)
    except ValueError:
        rel = Path(h.name)
    return str(rel.with_suffix('.md'))


# ---------------------------------------------------------------------------
# Type index file
# ---------------------------------------------------------------------------

def write_type_index(type_index: dict[str, str], output_dir: Path) -> None:
    """
    Write docs/type-index.txt — compact TypeName: path/to/File.md lookup.
    One entry per line, alphabetically sorted. Agents can grep this file
    to resolve a type name to its documentation path.
    """
    _valid_name = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$')
    lines = sorted(f"{name}: {path}" for name, path in type_index.items()
                   if _valid_name.match(name))
    out = output_dir / "type-index.txt"
    out.write_text('\n'.join(lines) + '\n', encoding='utf-8')
    print(f"Written {out}")


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------

def main():
    if len(sys.argv) < 3:
        print("Usage: python generate.py <input> [input2 ...] <output_dir>", file=sys.stderr)
        sys.exit(1)

    *input_args, output_arg = sys.argv[1:]
    output_dir = Path(output_arg)
    output_dir.mkdir(parents=True, exist_ok=True)

    # Collect (header, base) pairs from all inputs
    header_pairs: list[tuple[Path, Path]] = []
    for arg in input_args:
        pairs = collect_headers(Path(arg))
        if not pairs:
            print(f"Warning: no .h files found in {arg}", file=sys.stderr)
        header_pairs.extend(pairs)

    if not header_pairs:
        print("No .h files found.", file=sys.stderr)
        sys.exit(1)

    # --- Pass 1: parse all ---
    parsed_list: list[tuple[Path, Path, ParsedHeader]] = []
    for h, base in header_pairs:
        print(f"Parsing  {h} ...")
        try:
            parsed = parse_header(str(h))
            parsed_list.append((h, base, parsed))
        except Exception as exc:
            print(f"  ERROR parsing {h}: {exc}", file=sys.stderr)

    # --- Build corpus-wide type index ---
    type_index = build_type_index(parsed_list)

    # --- Pass 2: render all ---
    success = 0
    skipped = 0
    written_mds: set[str] = set()
    for h, base, parsed in parsed_list:
        print(f"Rendering {h} ...")
        current_md = _md_rel(h, base)
        out_path = output_dir / current_md
        out_path.parent.mkdir(parents=True, exist_ok=True)

        try:
            md = render_header(parsed, type_index=type_index, current_md=current_md)
            if not md:
                skipped += 1
                continue
            out_path.write_text(md, encoding='utf-8')
            written_mds.add(current_md)
            success += 1
        except Exception as exc:
            print(f"  ERROR rendering {h}: {exc}", file=sys.stderr)

    # Remove type-index entries whose files were not written (no documented content)
    type_index = {name: path for name, path in type_index.items() if path in written_mds}
    write_type_index(type_index, output_dir)
    print(f"\nGenerated {success}/{len(parsed_list)} files "
          f"({skipped} skipped — no documented content) + type-index.txt in {output_dir}/")


if __name__ == '__main__':
    main()