Files
mcp-ue/docgen/generate.py
Pierre-Marie Charavel 3d075cea20 Skip empty doc files and prune dead type-index entries
- render_header() returns "" when a header has no documented content
  (no /** */ comments on any class, property, function, enum, or delegate)
- generate.py skips writing those files and tracks which were written
- type-index.txt is filtered to only include types from written files,
  preventing dead entries that would cause get_class_overview to fail
- Summary line now reports how many files were skipped

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-27 10:38:29 -05:00

166 lines
5.6 KiB
Python

#!/usr/bin/env python3
"""
generate.py — CLI for UnrealDocGenerator.
Usage:
python generate.py <input> [input2 ...] <output_dir>
Each <input> can be a single .h file or a directory (processed recursively).
The last argument is always the output directory.
Two-pass pipeline:
Pass 1 — parse every header, build a corpus-wide type index
Pass 2 — render each header with cross-reference links injected
"""
import sys
import os
import re
from pathlib import Path
from ue_parser import parse_header, ParsedHeader
from ue_markdown import render_header
# ---------------------------------------------------------------------------
# Input collection
# ---------------------------------------------------------------------------
def collect_headers(input_arg: Path) -> list[tuple[Path, Path]]:
"""
Returns a list of (header_path, base_path) pairs for the given input.
base_path is used to compute relative output paths.
"""
if input_arg.is_file():
if input_arg.name.endswith('.generated.h'):
print(f"Skipping generated header: {input_arg}", file=sys.stderr)
return []
return [(input_arg, input_arg.parent)]
elif input_arg.is_dir():
headers = [
h for h in sorted(input_arg.rglob('*.h'))
if 'Intermediate' not in h.parts and not h.name.endswith('.generated.h')
]
return [(h, input_arg) for h in headers]
else:
print(f"Error: {input_arg} is not a file or directory", file=sys.stderr)
return []
# ---------------------------------------------------------------------------
# Type index
# ---------------------------------------------------------------------------
def build_type_index(parsed_list: list[tuple[Path, Path, ParsedHeader]]) -> dict[str, str]:
"""
Returns {TypeName: md_path_relative_to_docs_root} for every
class, struct, enum, and delegate in the corpus.
"""
index: dict[str, str] = {}
for h, base, parsed in parsed_list:
md_rel = _md_rel(h, base)
for ci in parsed.classes:
index[ci.name] = md_rel
for ei in parsed.enums:
index[ei.name] = md_rel
for di in parsed.delegates:
index[di.name] = md_rel
# namespace names are not types — skip
return index
def _md_rel(h: Path, base: Path) -> str:
"""Relative .md path for header h given its input base."""
try:
rel = h.relative_to(base)
except ValueError:
rel = Path(h.name)
return str(rel.with_suffix('.md'))
# ---------------------------------------------------------------------------
# Type index file
# ---------------------------------------------------------------------------
def write_type_index(type_index: dict[str, str], output_dir: Path) -> None:
"""
Write docs/type-index.txt — compact TypeName: path/to/File.md lookup.
One entry per line, alphabetically sorted. Agents can grep this file
to resolve a type name to its documentation path.
"""
_valid_name = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$')
lines = sorted(f"{name}: {path}" for name, path in type_index.items()
if _valid_name.match(name))
out = output_dir / "type-index.txt"
out.write_text('\n'.join(lines) + '\n', encoding='utf-8')
print(f"Written {out}")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
if len(sys.argv) < 3:
print("Usage: python generate.py <input> [input2 ...] <output_dir>", file=sys.stderr)
sys.exit(1)
*input_args, output_arg = sys.argv[1:]
output_dir = Path(output_arg)
output_dir.mkdir(parents=True, exist_ok=True)
# Collect (header, base) pairs from all inputs
header_pairs: list[tuple[Path, Path]] = []
for arg in input_args:
pairs = collect_headers(Path(arg))
if not pairs:
print(f"Warning: no .h files found in {arg}", file=sys.stderr)
header_pairs.extend(pairs)
if not header_pairs:
print("No .h files found.", file=sys.stderr)
sys.exit(1)
# --- Pass 1: parse all ---
parsed_list: list[tuple[Path, Path, ParsedHeader]] = []
for h, base in header_pairs:
print(f"Parsing {h} ...")
try:
parsed = parse_header(str(h))
parsed_list.append((h, base, parsed))
except Exception as exc:
print(f" ERROR parsing {h}: {exc}", file=sys.stderr)
# --- Build corpus-wide type index ---
type_index = build_type_index(parsed_list)
# --- Pass 2: render all ---
success = 0
skipped = 0
written_mds: set[str] = set()
for h, base, parsed in parsed_list:
print(f"Rendering {h} ...")
current_md = _md_rel(h, base)
out_path = output_dir / current_md
out_path.parent.mkdir(parents=True, exist_ok=True)
try:
md = render_header(parsed, type_index=type_index, current_md=current_md)
if not md:
skipped += 1
continue
out_path.write_text(md, encoding='utf-8')
written_mds.add(current_md)
success += 1
except Exception as exc:
print(f" ERROR rendering {h}: {exc}", file=sys.stderr)
# Remove type-index entries whose files were not written (no documented content)
type_index = {name: path for name, path in type_index.items() if path in written_mds}
write_type_index(type_index, output_dir)
print(f"\nGenerated {success}/{len(parsed_list)} files "
f"({skipped} skipped — no documented content) + type-index.txt in {output_dir}/")
if __name__ == '__main__':
main()