- ue_parser.py: position-based UE C++ header parser - ue_markdown.py: compact agent-optimised Markdown renderer - generate.py: two-pass CLI (parse-all → type index → render-all) - samples/: representative UE headers (GeomUtils, AIController, GameplayTagsManager) - .claude/skills/ue-api/: Claude Code skill for querying UE docs + source headers - CLAUDE.md: architecture notes, usage, critical gotchas Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1184 lines
39 KiB
Python
1184 lines
39 KiB
Python
"""
|
|
ue_parser.py — Parse Unreal Engine C++ header files into Python dataclasses.
|
|
"""
|
|
from __future__ import annotations
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Data model
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@dataclass
|
|
class DocComment:
|
|
description: str = ""
|
|
params: dict = field(default_factory=dict) # name -> desc
|
|
returns: str = ""
|
|
|
|
|
|
@dataclass
|
|
class PropertyInfo:
|
|
name: str = ""
|
|
type: str = ""
|
|
specifiers: str = "" # raw UPROPERTY(...) content
|
|
access: str = "public"
|
|
editor_only: bool = False
|
|
comment: Optional[DocComment] = None
|
|
|
|
|
|
@dataclass
|
|
class FunctionInfo:
|
|
name: str = ""
|
|
return_type: str = ""
|
|
raw_params: str = ""
|
|
full_signature: str = ""
|
|
uf_specifiers: str = "" # raw UFUNCTION(...) content
|
|
comment: Optional[DocComment] = None
|
|
modifiers: list = field(default_factory=list)
|
|
access: str = "public"
|
|
is_deprecated: bool = False
|
|
deprecated_version: str = ""
|
|
deprecated_msg: str = ""
|
|
editor_only: bool = False
|
|
|
|
|
|
@dataclass
|
|
class EnumValue:
|
|
name: str = ""
|
|
value: str = ""
|
|
comment: str = ""
|
|
|
|
|
|
@dataclass
|
|
class EnumInfo:
|
|
name: str = ""
|
|
underlying_type: str = ""
|
|
ue_specifiers: str = ""
|
|
comment: Optional[DocComment] = None
|
|
values: list = field(default_factory=list)
|
|
editor_only: bool = False
|
|
|
|
|
|
@dataclass
|
|
class ClassInfo:
|
|
name: str = ""
|
|
kind: str = "class"
|
|
bases: list = field(default_factory=list)
|
|
ue_specifiers: str = ""
|
|
module_api: str = ""
|
|
comment: Optional[DocComment] = None
|
|
properties: list = field(default_factory=list)
|
|
functions: list = field(default_factory=list)
|
|
nested_enums: list = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class DelegateInfo:
|
|
name: str = ""
|
|
macro: str = ""
|
|
params: list = field(default_factory=list) # list[(type, name)]
|
|
comment: Optional[DocComment] = None
|
|
is_multicast: bool = False
|
|
is_dynamic: bool = False
|
|
|
|
|
|
@dataclass
|
|
class FreeFunction:
|
|
name: str = ""
|
|
full_signature: str = ""
|
|
return_type: str = ""
|
|
raw_params: str = ""
|
|
comment: Optional[DocComment] = None
|
|
modifiers: list = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class NamespaceInfo:
|
|
name: str = ""
|
|
functions: list = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class ParsedHeader:
|
|
filepath: str = ""
|
|
filename: str = ""
|
|
module_name: str = ""
|
|
classes: list = field(default_factory=list)
|
|
enums: list = field(default_factory=list)
|
|
delegates: list = field(default_factory=list)
|
|
namespaces: list = field(default_factory=list)
|
|
free_functions: list = field(default_factory=list)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Core utilities
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def find_matching_close(text: str, open_pos: int, open_ch: str, close_ch: str) -> int:
|
|
"""
|
|
Given text[open_pos] == open_ch, return index of matching close_ch.
|
|
Skips // comments, /* */ comments, and string literals.
|
|
Returns -1 if not found.
|
|
"""
|
|
depth = 0
|
|
i = open_pos
|
|
n = len(text)
|
|
while i < n:
|
|
ch = text[i]
|
|
# Line comment
|
|
if ch == '/' and i + 1 < n and text[i + 1] == '/':
|
|
while i < n and text[i] != '\n':
|
|
i += 1
|
|
continue
|
|
# Block comment
|
|
if ch == '/' and i + 1 < n and text[i + 1] == '*':
|
|
i += 2
|
|
while i < n - 1 and not (text[i] == '*' and text[i + 1] == '/'):
|
|
i += 1
|
|
i += 2
|
|
continue
|
|
# String literal
|
|
if ch == '"':
|
|
i += 1
|
|
while i < n:
|
|
if text[i] == '\\':
|
|
i += 2
|
|
continue
|
|
if text[i] == '"':
|
|
break
|
|
i += 1
|
|
i += 1
|
|
continue
|
|
if ch == open_ch:
|
|
depth += 1
|
|
elif ch == close_ch:
|
|
depth -= 1
|
|
if depth == 0:
|
|
return i
|
|
i += 1
|
|
return -1
|
|
|
|
|
|
def extract_balanced(text: str, start: int, open_ch: str, close_ch: str):
|
|
"""
|
|
Find open_ch at or after start, return (inner_content, end_pos).
|
|
end_pos is one past the closing char. Returns ("", start) if not found.
|
|
"""
|
|
op = text.find(open_ch, start)
|
|
if op == -1:
|
|
return ("", start)
|
|
cl = find_matching_close(text, op, open_ch, close_ch)
|
|
if cl == -1:
|
|
return ("", start)
|
|
return (text[op + 1:cl], cl + 1)
|
|
|
|
|
|
def _split_params(raw_params: str) -> list:
|
|
"""Split param string respecting <> and () nesting."""
|
|
params = []
|
|
depth = 0
|
|
current = []
|
|
for ch in raw_params:
|
|
if ch in '<({':
|
|
depth += 1
|
|
current.append(ch)
|
|
elif ch in '>)}':
|
|
depth -= 1
|
|
current.append(ch)
|
|
elif ch == ',' and depth == 0:
|
|
p = ''.join(current).strip()
|
|
if p:
|
|
params.append(p)
|
|
current = []
|
|
else:
|
|
current.append(ch)
|
|
p = ''.join(current).strip()
|
|
if p:
|
|
params.append(p)
|
|
return params
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Doc comment parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def parse_doc_comment(raw: str) -> DocComment:
|
|
"""Parse a /** ... */ or // ... doc comment block."""
|
|
text = raw.strip()
|
|
if text.startswith('/**'):
|
|
text = text[3:]
|
|
if text.endswith('*/'):
|
|
text = text[:-2]
|
|
|
|
lines = []
|
|
for line in text.split('\n'):
|
|
line = line.strip().lstrip('*').strip()
|
|
lines.append(line)
|
|
|
|
doc = DocComment()
|
|
desc_lines = []
|
|
i = 0
|
|
while i < len(lines):
|
|
line = lines[i]
|
|
# @param
|
|
m = re.match(r'@param(?:\[[\w,]+\])?\s+(\w+)\s*(.*)', line, re.IGNORECASE)
|
|
if m:
|
|
pname, pdesc = m.group(1), m.group(2).strip()
|
|
i += 1
|
|
while i < len(lines) and lines[i] and not lines[i].startswith('@'):
|
|
pdesc += ' ' + lines[i]
|
|
i += 1
|
|
doc.params[pname] = pdesc.strip(' -')
|
|
continue
|
|
# @return / @returns
|
|
m = re.match(r'@returns?\s*(.*)', line, re.IGNORECASE)
|
|
if m:
|
|
rdesc = m.group(1).strip()
|
|
i += 1
|
|
while i < len(lines) and lines[i] and not lines[i].startswith('@'):
|
|
rdesc += ' ' + lines[i]
|
|
i += 1
|
|
doc.returns = rdesc.strip(' -')
|
|
continue
|
|
# @note, @see etc
|
|
m = re.match(r'@(?:note|see|todo)\s*(.*)', line, re.IGNORECASE)
|
|
if m:
|
|
note = m.group(1).strip()
|
|
if note:
|
|
desc_lines.append(note)
|
|
i += 1
|
|
continue
|
|
# Skip other @tags
|
|
if re.match(r'@\w+', line):
|
|
i += 1
|
|
continue
|
|
desc_lines.append(line)
|
|
i += 1
|
|
|
|
doc.description = ' '.join(l for l in desc_lines if l).strip()
|
|
return doc
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Preprocessor tracking
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class EditorOnlyTracker:
|
|
EDITOR_GUARDS = {'WITH_EDITOR', 'WITH_EDITORONLY_DATA'}
|
|
|
|
def __init__(self):
|
|
self._stack: list[bool] = []
|
|
|
|
def handle_line(self, line: str):
|
|
s = line.strip()
|
|
if s.startswith('#if ') or s.startswith('#ifdef '):
|
|
cond = s.split(None, 1)[1].strip() if ' ' in s else ''
|
|
is_editor = any(g in cond for g in self.EDITOR_GUARDS)
|
|
self._stack.append(is_editor)
|
|
elif s.startswith('#elif') or s.startswith('#else'):
|
|
if self._stack:
|
|
self._stack[-1] = False
|
|
elif s.startswith('#endif'):
|
|
if self._stack:
|
|
self._stack.pop()
|
|
|
|
@property
|
|
def editor_only(self) -> bool:
|
|
return any(self._stack)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Function signature parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_API_RE = re.compile(r'\b[A-Z][A-Z0-9_]+_API\b\s*')
|
|
_MUTABLE_RE = re.compile(r'\bmutable\b\s*')
|
|
|
|
_PREFIX_MODS = {'virtual', 'static', 'inline', 'explicit', 'friend',
|
|
'forceinline', 'forcenoinline', 'ue_nodiscard', 'constexpr',
|
|
'extern'}
|
|
|
|
|
|
def _clean_type(t: str) -> str:
|
|
return ' '.join(t.split())
|
|
|
|
|
|
def _parse_function_signature(sig: str):
|
|
"""
|
|
Parse: [modifiers] [return_type] name(params)
|
|
Returns (return_type, name, raw_params, modifiers).
|
|
"""
|
|
sig = _API_RE.sub('', sig).strip()
|
|
|
|
# Find the params: rightmost balanced () pair
|
|
paren_close = len(sig) - 1
|
|
while paren_close >= 0 and sig[paren_close] != ')':
|
|
paren_close -= 1
|
|
if paren_close < 0:
|
|
return "", sig.strip(), "", []
|
|
|
|
# Walk backward from paren_close to find matching (
|
|
depth = 0
|
|
paren_open = paren_close
|
|
while paren_open >= 0:
|
|
if sig[paren_open] == ')':
|
|
depth += 1
|
|
elif sig[paren_open] == '(':
|
|
depth -= 1
|
|
if depth == 0:
|
|
break
|
|
paren_open -= 1
|
|
|
|
if paren_open < 0:
|
|
return "", sig.strip(), "", []
|
|
|
|
raw_params = sig[paren_open + 1:paren_close].strip()
|
|
before = sig[:paren_open].strip()
|
|
|
|
# Extract leading modifier keywords
|
|
modifiers = []
|
|
words = before.split()
|
|
i = 0
|
|
while i < len(words):
|
|
w = words[i].lower()
|
|
if w in _PREFIX_MODS:
|
|
modifiers.append(words[i].lower())
|
|
i += 1
|
|
else:
|
|
break
|
|
before = ' '.join(words[i:])
|
|
|
|
# Last token is function name
|
|
tokens = before.rsplit(None, 1)
|
|
if len(tokens) == 2:
|
|
return_type = _clean_type(tokens[0])
|
|
name = tokens[1]
|
|
elif len(tokens) == 1:
|
|
return_type = ""
|
|
name = tokens[0]
|
|
else:
|
|
return_type = ""
|
|
name = ""
|
|
|
|
# Handle pointer/ref in name vs return_type
|
|
while name.startswith(('*', '&')):
|
|
return_type = return_type + name[0]
|
|
name = name[1:]
|
|
|
|
return _clean_type(return_type), name.strip(), raw_params, modifiers
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Enum body parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _parse_enum_body(body: str) -> list:
|
|
values = []
|
|
for line in body.split('\n'):
|
|
comment = ""
|
|
ci = line.find('//')
|
|
if ci != -1:
|
|
comment = line[ci + 2:].strip()
|
|
line = line[:ci]
|
|
line = line.strip().rstrip(',').strip()
|
|
if not line:
|
|
continue
|
|
# Skip UMETA(...)
|
|
line = re.sub(r'UMETA\s*\([^)]*\)', '', line).strip()
|
|
if not line:
|
|
continue
|
|
m = re.match(r'(\w+)\s*=\s*(.*)', line)
|
|
if m:
|
|
values.append(EnumValue(name=m.group(1).strip(),
|
|
value=m.group(2).strip(), comment=comment))
|
|
elif re.match(r'\w+\s*$', line):
|
|
values.append(EnumValue(name=line.strip(), value="", comment=comment))
|
|
return values
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Delegate parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _parse_delegate(macro: str, args_str: str,
|
|
comment: Optional[DocComment]) -> DelegateInfo:
|
|
is_dynamic = 'DYNAMIC' in macro
|
|
is_multicast = 'MULTICAST' in macro
|
|
args = [a.strip() for a in args_str.split(',') if a.strip()]
|
|
name = args[0] if args else ""
|
|
rest = args[1:]
|
|
params = []
|
|
for i in range(0, len(rest) - 1, 2):
|
|
ptype = rest[i].strip()
|
|
pname = rest[i + 1].strip() if i + 1 < len(rest) else ""
|
|
if ptype:
|
|
params.append((ptype, pname))
|
|
return DelegateInfo(name=name, macro=macro, params=params, comment=comment,
|
|
is_multicast=is_multicast, is_dynamic=is_dynamic)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Class body parsing — line-based approach to avoid regex backtracking
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Macros to skip entirely (skip line when encountered)
|
|
_SKIP_MACROS = frozenset({
|
|
'GENERATED_BODY', 'GENERATED_UCLASS_BODY', 'GENERATED_USTRUCT_BODY',
|
|
'GENERATED_USTRUCT_BODY', 'check', 'checkSlow', 'ensure', 'checkf',
|
|
'UE_LOG', 'DECLARE_LOG_CATEGORY_EXTERN', 'DECLARE_STATS_GROUP',
|
|
})
|
|
|
|
# Macros we handle explicitly (don't treat as regular lines)
|
|
_HANDLED_MACROS = frozenset({
|
|
'UPROPERTY', 'UFUNCTION', 'UENUM', 'UE_DEPRECATED', 'UE_DEPRECATED_FORGAME',
|
|
'DECLARE_DELEGATE', 'DECLARE_MULTICAST_DELEGATE', 'DECLARE_DYNAMIC_DELEGATE',
|
|
'DECLARE_DYNAMIC_MULTICAST_DELEGATE', 'DECLARE_TS_MULTICAST_DELEGATE',
|
|
'DECLARE_MULTICAST_DELEGATE_OneParam', 'DECLARE_MULTICAST_DELEGATE_TwoParams',
|
|
'DECLARE_MULTICAST_DELEGATE_ThreeParams',
|
|
})
|
|
|
|
# C++ keywords that start non-function statements
|
|
_SKIP_KEYWORDS = frozenset({
|
|
'if', 'else', 'while', 'for', 'do', 'switch', 'return', 'break',
|
|
'continue', 'typedef', 'using', 'namespace', 'template',
|
|
'throw', 'try', 'catch', 'static_assert', 'static_cast',
|
|
'reinterpret_cast', 'dynamic_cast', 'const_cast',
|
|
})
|
|
|
|
|
|
def _parse_class_body(body: str, class_kind: str = 'class') -> tuple:
|
|
"""
|
|
Parse class/struct body. Returns (properties, functions, nested_enums).
|
|
Uses a position-based scanner; falls back to line-skip for unrecognized patterns.
|
|
"""
|
|
properties: list = []
|
|
functions: list = []
|
|
nested_enums: list = []
|
|
|
|
access = 'private' if class_kind == 'class' else 'public'
|
|
editor_tracker = EditorOnlyTracker()
|
|
pending_comment: Optional[DocComment] = None
|
|
pending_deprecated = False
|
|
pending_dep_version = ""
|
|
pending_dep_msg = ""
|
|
|
|
i = 0
|
|
n = len(body)
|
|
|
|
def advance_line() -> int:
|
|
"""Return position after the next newline from i."""
|
|
end = body.find('\n', i)
|
|
return (end + 1) if end != -1 else n
|
|
|
|
def skip_block(pos: int) -> int:
|
|
"""Skip a {…} block starting at or after pos."""
|
|
bp = body.find('{', pos)
|
|
if bp == -1:
|
|
return pos
|
|
cl = find_matching_close(body, bp, '{', '}')
|
|
if cl == -1:
|
|
return pos
|
|
ep = cl + 1
|
|
while ep < n and body[ep] in ' \t\r\n':
|
|
ep += 1
|
|
if ep < n and body[ep] == ';':
|
|
ep += 1
|
|
return ep
|
|
|
|
while i < n:
|
|
# Skip pure whitespace
|
|
while i < n and body[i] in ' \t\r\n':
|
|
i += 1
|
|
if i >= n:
|
|
break
|
|
|
|
ch = body[i]
|
|
|
|
# --- Preprocessor ---
|
|
if ch == '#':
|
|
end = body.find('\n', i)
|
|
pp_line = body[i:(end if end != -1 else n)]
|
|
editor_tracker.handle_line(pp_line)
|
|
i = (end + 1) if end != -1 else n
|
|
continue
|
|
|
|
# --- Doc comment /** */ ---
|
|
if body[i:i+3] == '/**':
|
|
end = body.find('*/', i)
|
|
if end == -1:
|
|
i += 3
|
|
continue
|
|
pending_comment = parse_doc_comment(body[i:end + 2])
|
|
i = end + 2
|
|
continue
|
|
|
|
# --- Block comment /* */ ---
|
|
if body[i:i+2] == '/*':
|
|
end = body.find('*/', i)
|
|
i = (end + 2) if end != -1 else n
|
|
continue
|
|
|
|
# --- Line comment // ---
|
|
if body[i:i+2] == '//':
|
|
end = body.find('\n', i)
|
|
line_text = body[i:(end if end != -1 else n)].lstrip('/').strip()
|
|
if pending_comment is None and line_text:
|
|
pending_comment = DocComment(description=line_text)
|
|
i = (end + 1) if end != -1 else n
|
|
continue
|
|
|
|
# --- Read identifier at current position ---
|
|
m = re.match(r'\w+', body[i:])
|
|
if not m:
|
|
# Not an identifier; skip character
|
|
i += 1
|
|
continue
|
|
ident = m.group(0)
|
|
|
|
# --- Access specifier: public: protected: private: ---
|
|
if ident in ('public', 'protected', 'private'):
|
|
colon_pos = i + len(ident)
|
|
while colon_pos < n and body[colon_pos] in ' \t':
|
|
colon_pos += 1
|
|
if colon_pos < n and body[colon_pos] == ':':
|
|
access = ident
|
|
i = colon_pos + 1
|
|
pending_comment = None
|
|
continue
|
|
|
|
# --- GENERATED_BODY etc ---
|
|
if ident in _SKIP_MACROS or (ident.startswith('GENERATED_') and ident.endswith('BODY')):
|
|
# Skip to end of line or semicolon
|
|
end = body.find('\n', i)
|
|
i = (end + 1) if end != -1 else n
|
|
pending_comment = None
|
|
continue
|
|
|
|
# --- UE_DEPRECATED / UE_DEPRECATED_FORGAME ---
|
|
if ident in ('UE_DEPRECATED', 'UE_DEPRECATED_FORGAME'):
|
|
inner, ep = extract_balanced(body, i, '(', ')')
|
|
parts = inner.split(',', 1)
|
|
pending_dep_version = parts[0].strip().strip('"')
|
|
pending_dep_msg = parts[1].strip().strip('"') if len(parts) > 1 else ""
|
|
pending_deprecated = True
|
|
i = ep
|
|
continue
|
|
|
|
# --- UPROPERTY ---
|
|
if ident == 'UPROPERTY':
|
|
inner, ep = extract_balanced(body, i, '(', ')')
|
|
specifiers = inner.strip()
|
|
comment = pending_comment
|
|
pending_comment = None
|
|
# Advance to property declaration
|
|
j = ep
|
|
while j < n and body[j] in ' \t\r\n':
|
|
j += 1
|
|
semi = body.find(';', j)
|
|
if semi == -1:
|
|
i = ep
|
|
continue
|
|
decl = _API_RE.sub('', body[j:semi]).strip()
|
|
decl = _MUTABLE_RE.sub('', decl).strip()
|
|
# Parse: TYPE NAME [: bits]
|
|
m2 = re.match(r'(.*?)\s+(\w+)\s*(?::\s*\d+)?\s*$', decl, re.DOTALL)
|
|
if m2:
|
|
ptype = _clean_type(m2.group(1))
|
|
pname = m2.group(2)
|
|
bf = re.search(r':\s*(\d+)', decl)
|
|
if bf:
|
|
ptype += ' : ' + bf.group(1)
|
|
else:
|
|
ptype = decl
|
|
pname = ""
|
|
prop = PropertyInfo(name=pname, type=ptype, specifiers=specifiers,
|
|
access=access, editor_only=editor_tracker.editor_only,
|
|
comment=comment)
|
|
properties.append(prop)
|
|
i = semi + 1
|
|
continue
|
|
|
|
# --- UFUNCTION ---
|
|
if ident == 'UFUNCTION':
|
|
inner, ep = extract_balanced(body, i, '(', ')')
|
|
uf_specs = inner.strip()
|
|
comment = pending_comment
|
|
pending_comment = None
|
|
j = ep
|
|
while j < n and body[j] in ' \t\r\n':
|
|
j += 1
|
|
# Read function signature until ; or {
|
|
k = j
|
|
depth = 0
|
|
while k < n:
|
|
if body[k] == '(':
|
|
depth += 1
|
|
elif body[k] == ')':
|
|
depth -= 1
|
|
elif body[k] == '{' and depth == 0:
|
|
break
|
|
elif body[k] == ';' and depth == 0:
|
|
break
|
|
k += 1
|
|
sig_text = _API_RE.sub('', body[j:k]).strip()
|
|
# Skip body if inline
|
|
end_fn = k
|
|
if k < n and body[k] == '{':
|
|
cl = find_matching_close(body, k, '{', '}')
|
|
end_fn = (cl + 1) if cl != -1 else k + 1
|
|
elif k < n and body[k] == ';':
|
|
end_fn = k + 1
|
|
ret, name, raw_params, mods = _parse_function_signature(sig_text)
|
|
fn = FunctionInfo(
|
|
name=name, return_type=ret, raw_params=raw_params,
|
|
full_signature=sig_text.strip(), uf_specifiers=uf_specs,
|
|
comment=comment, modifiers=mods, access=access,
|
|
is_deprecated=pending_deprecated, deprecated_version=pending_dep_version,
|
|
deprecated_msg=pending_dep_msg, editor_only=editor_tracker.editor_only,
|
|
)
|
|
functions.append(fn)
|
|
pending_deprecated = False
|
|
pending_dep_version = ""
|
|
pending_dep_msg = ""
|
|
i = end_fn
|
|
continue
|
|
|
|
# --- UENUM ---
|
|
if ident == 'UENUM':
|
|
inner, ep = extract_balanced(body, i, '(', ')')
|
|
ue_specs = inner.strip()
|
|
comment = pending_comment
|
|
pending_comment = None
|
|
j = ep
|
|
while j < n and body[j] in ' \t\r\n':
|
|
j += 1
|
|
result = _parse_enum_at(body, j, ue_specs, comment, editor_tracker.editor_only)
|
|
if result:
|
|
nested_enums.append(result[0])
|
|
i = result[1]
|
|
else:
|
|
i = j
|
|
continue
|
|
|
|
# --- DECLARE_*_DELEGATE inside class (inline delegate typedef) ---
|
|
if ident.startswith('DECLARE_') and 'DELEGATE' in ident:
|
|
# Skip to end of line
|
|
end = body.find('\n', i)
|
|
i = (end + 1) if end != -1 else n
|
|
pending_comment = None
|
|
continue
|
|
|
|
# --- Nested struct/class ---
|
|
if ident in ('struct', 'class'):
|
|
# Find the next { - check if this is a definition (not forward decl)
|
|
bp = body.find('{', i)
|
|
semi = body.find(';', i)
|
|
if bp != -1 and (semi == -1 or bp < semi):
|
|
# Has a body - skip the whole nested class
|
|
i = skip_block(bp)
|
|
pending_comment = None
|
|
else:
|
|
# Forward declaration - skip to ;
|
|
i = (semi + 1) if semi != -1 else advance_line()
|
|
pending_comment = None
|
|
continue
|
|
|
|
# --- Nested enum ---
|
|
if ident == 'enum':
|
|
result = _parse_enum_at(body, i, "", pending_comment, editor_tracker.editor_only)
|
|
if result:
|
|
nested_enums.append(result[0])
|
|
pending_comment = None
|
|
i = result[1]
|
|
else:
|
|
i = advance_line()
|
|
pending_comment = None
|
|
continue
|
|
|
|
# --- Skip C++ keywords that aren't declarations ---
|
|
if ident in _SKIP_KEYWORDS:
|
|
i = advance_line()
|
|
pending_comment = None
|
|
continue
|
|
|
|
# --- Try to parse as a function or method ---
|
|
# Look for '(' on the current line (safe: no cross-line backtracking)
|
|
line_end = body.find('\n', i)
|
|
if line_end == -1:
|
|
line_end = n
|
|
current_line = body[i:line_end]
|
|
|
|
paren_in_line = current_line.find('(')
|
|
if paren_in_line != -1:
|
|
abs_open = i + paren_in_line
|
|
# Make sure there's a word just before '('
|
|
before_paren = body[i:abs_open].rstrip()
|
|
if before_paren and re.search(r'\w$', before_paren):
|
|
cl_pos = find_matching_close(body, abs_open, '(', ')')
|
|
if cl_pos != -1:
|
|
sig_text = _API_RE.sub('', body[i:cl_pos + 1]).strip()
|
|
sig_text = _MUTABLE_RE.sub('', sig_text).strip()
|
|
ret, name, raw_params, mods = _parse_function_signature(sig_text)
|
|
|
|
# Skip obvious non-functions
|
|
skip = (not name or name in _SKIP_KEYWORDS or
|
|
name in _SKIP_MACROS or name in _HANDLED_MACROS)
|
|
# All-uppercase is probably a macro
|
|
if name and name == name.upper() and '_' in name:
|
|
skip = True
|
|
|
|
if not skip:
|
|
# Skip inline body or find end of declaration
|
|
end_fn = cl_pos + 1
|
|
k = end_fn
|
|
while k < n and body[k] in ' \t':
|
|
k += 1
|
|
if k < n and body[k] == '{':
|
|
cl2 = find_matching_close(body, k, '{', '}')
|
|
end_fn = (cl2 + 1) if cl2 != -1 else k + 1
|
|
else:
|
|
# Find ; on current or next line
|
|
semi = body.find(';', cl_pos + 1)
|
|
nl = body.find('\n', cl_pos + 1)
|
|
if semi != -1 and (nl == -1 or semi <= nl + 2):
|
|
end_fn = semi + 1
|
|
else:
|
|
end_fn = (nl + 1) if nl != -1 else n
|
|
|
|
comment = pending_comment
|
|
pending_comment = None
|
|
|
|
fn = FunctionInfo(
|
|
name=name, return_type=ret, raw_params=raw_params,
|
|
full_signature=sig_text.strip(), uf_specifiers="",
|
|
comment=comment, modifiers=mods, access=access,
|
|
is_deprecated=pending_deprecated,
|
|
deprecated_version=pending_dep_version,
|
|
deprecated_msg=pending_dep_msg,
|
|
editor_only=editor_tracker.editor_only,
|
|
)
|
|
functions.append(fn)
|
|
pending_deprecated = False
|
|
pending_dep_version = ""
|
|
pending_dep_msg = ""
|
|
i = end_fn
|
|
continue
|
|
|
|
# --- Default: skip line ---
|
|
i = advance_line()
|
|
pending_comment = None
|
|
|
|
return properties, functions, nested_enums
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Enum at position
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _parse_enum_at(text: str, pos: int, ue_specs: str,
|
|
comment: Optional[DocComment], editor_only: bool):
|
|
"""Parse enum starting at pos. Returns (EnumInfo, end_pos) or None."""
|
|
m = re.match(r'enum\s+(?:class\s+)?(\w+)\s*(?::\s*(\w+))?\s*\{',
|
|
text[pos:], re.DOTALL)
|
|
if not m:
|
|
return None
|
|
name = m.group(1)
|
|
underlying = m.group(2) or ""
|
|
brace_start = pos + m.end() - 1
|
|
cl = find_matching_close(text, brace_start, '{', '}')
|
|
if cl == -1:
|
|
return None
|
|
body = text[brace_start + 1:cl]
|
|
values = _parse_enum_body(body)
|
|
end_pos = cl + 1
|
|
while end_pos < len(text) and text[end_pos] in ' \t\r\n':
|
|
end_pos += 1
|
|
if end_pos < len(text) and text[end_pos] == ';':
|
|
end_pos += 1
|
|
return EnumInfo(name=name, underlying_type=underlying, ue_specifiers=ue_specs,
|
|
comment=comment, values=values, editor_only=editor_only), end_pos
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Class/struct at position
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _parse_class_or_struct_at(text: str, pos: int, ue_specs: str,
|
|
comment: Optional[DocComment], macro: str):
|
|
"""Parse class/struct at pos. Returns (ClassInfo, end_pos) or None."""
|
|
m = re.match(
|
|
r'(class|struct)\s+'
|
|
r'(?:([A-Z][A-Z0-9_]+_API)\s+)?'
|
|
r'(\w+)'
|
|
r'(?:\s*:\s*([^{;]+?))?'
|
|
r'\s*\{',
|
|
text[pos:], re.DOTALL
|
|
)
|
|
if not m:
|
|
return None
|
|
kind = m.group(1)
|
|
module_api = m.group(2) or ""
|
|
name = m.group(3)
|
|
bases_str = m.group(4) or ""
|
|
|
|
bases = []
|
|
for b in bases_str.split(','):
|
|
b = b.strip()
|
|
b = re.sub(r'^(?:public|protected|private)\s+', '', b).strip()
|
|
if b:
|
|
bases.append(b)
|
|
|
|
brace_pos = pos + m.end() - 1
|
|
cl = find_matching_close(text, brace_pos, '{', '}')
|
|
if cl == -1:
|
|
return None
|
|
|
|
body = text[brace_pos + 1:cl]
|
|
props, fns, nested_enums = _parse_class_body(body, kind)
|
|
|
|
end_pos = cl + 1
|
|
while end_pos < len(text) and text[end_pos] in ' \t\r\n':
|
|
end_pos += 1
|
|
if end_pos < len(text) and text[end_pos] == ';':
|
|
end_pos += 1
|
|
|
|
return ClassInfo(name=name, kind=kind, bases=bases, ue_specifiers=ue_specs,
|
|
module_api=module_api, comment=comment, properties=props,
|
|
functions=fns, nested_enums=nested_enums), end_pos
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Namespace free-function parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _parse_namespace_functions(body: str) -> list:
|
|
"""Parse free functions in a namespace body."""
|
|
functions = []
|
|
i = 0
|
|
n = len(body)
|
|
pending_comment: Optional[DocComment] = None
|
|
|
|
while i < n:
|
|
while i < n and body[i] in ' \t\r\n':
|
|
i += 1
|
|
if i >= n:
|
|
break
|
|
|
|
ch = body[i]
|
|
|
|
if ch == '#':
|
|
end = body.find('\n', i)
|
|
i = (end + 1) if end != -1 else n
|
|
continue
|
|
|
|
if body[i:i+3] == '/**':
|
|
end = body.find('*/', i)
|
|
if end == -1:
|
|
i += 3
|
|
continue
|
|
pending_comment = parse_doc_comment(body[i:end + 2])
|
|
i = end + 2
|
|
continue
|
|
|
|
if body[i:i+2] == '/*':
|
|
end = body.find('*/', i)
|
|
i = (end + 2) if end != -1 else n
|
|
continue
|
|
|
|
if body[i:i+2] == '//':
|
|
end = body.find('\n', i)
|
|
line_text = body[i:(end if end != -1 else n)].lstrip('/').strip()
|
|
if pending_comment is None and line_text:
|
|
pending_comment = DocComment(description=line_text)
|
|
i = (end + 1) if end != -1 else n
|
|
continue
|
|
|
|
# typedef/using
|
|
if body[i:].startswith(('typedef ', 'using ')):
|
|
end = body.find(';', i)
|
|
i = (end + 1) if end != -1 else n
|
|
pending_comment = None
|
|
continue
|
|
|
|
# struct/class/enum — skip
|
|
m = re.match(r'(struct|class|enum)\b', body[i:])
|
|
if m and m.start() == 0:
|
|
bp = body.find('{', i)
|
|
semi = body.find(';', i)
|
|
if bp != -1 and (semi == -1 or bp < semi):
|
|
cl = find_matching_close(body, bp, '{', '}')
|
|
ep = cl + 1 if cl != -1 else bp + 1
|
|
while ep < n and body[ep] in ' \t\r\n':
|
|
ep += 1
|
|
if ep < n and body[ep] == ';':
|
|
ep += 1
|
|
i = ep
|
|
else:
|
|
i = (semi + 1) if semi != -1 else n
|
|
pending_comment = None
|
|
continue
|
|
|
|
# Look for function on current line
|
|
line_end = body.find('\n', i)
|
|
if line_end == -1:
|
|
line_end = n
|
|
current_line = body[i:line_end]
|
|
|
|
paren_in_line = current_line.find('(')
|
|
if paren_in_line != -1:
|
|
abs_open = i + paren_in_line
|
|
before_paren = body[i:abs_open].rstrip()
|
|
if before_paren and re.search(r'\w$', before_paren):
|
|
cl_pos = find_matching_close(body, abs_open, '(', ')')
|
|
if cl_pos != -1:
|
|
sig_text = _API_RE.sub('', body[i:cl_pos + 1]).strip()
|
|
ret, name, raw_params, mods = _parse_function_signature(sig_text)
|
|
|
|
skip = (not name or name in _SKIP_KEYWORDS or
|
|
name in _SKIP_MACROS)
|
|
if name and name == name.upper() and '_' in name:
|
|
skip = True
|
|
|
|
if not skip:
|
|
end_fn = cl_pos + 1
|
|
k = end_fn
|
|
while k < n and body[k] in ' \t\r\n':
|
|
k += 1
|
|
if k < n and body[k] == '{':
|
|
cl2 = find_matching_close(body, k, '{', '}')
|
|
end_fn = (cl2 + 1) if cl2 != -1 else k + 1
|
|
elif k < n and body[k] == ';':
|
|
end_fn = k + 1
|
|
else:
|
|
end_fn = line_end + 1
|
|
|
|
fn = FreeFunction(name=name, full_signature=sig_text.strip(),
|
|
return_type=ret, raw_params=raw_params,
|
|
comment=pending_comment, modifiers=mods)
|
|
functions.append(fn)
|
|
pending_comment = None
|
|
i = end_fn
|
|
continue
|
|
|
|
# Skip line
|
|
i = line_end + 1
|
|
pending_comment = None
|
|
|
|
return functions
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Module name inference
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_MODULE_SUFFIXES = [
|
|
'MODULE', 'MANAGER', 'SYSTEM', 'EDITOR', 'UTILS', 'TYPES',
|
|
'SETTINGS', 'TAGS', 'ENGINE', 'CORE', 'INTERFACE', 'COMPONENT',
|
|
'SUBSYSTEM', 'PLUGIN', 'RUNTIME', 'TASK', 'TASKS', 'GAME',
|
|
]
|
|
|
|
|
|
def _caps_to_camel(s: str) -> str:
|
|
"""Convert ALL-CAPS identifier to CamelCase: AIMODULE->AIModule, GAMEPLAYTAGS->GameplayTags."""
|
|
if not s:
|
|
return s
|
|
if '_' in s:
|
|
return ''.join(w.capitalize() for w in s.split('_'))
|
|
|
|
words = []
|
|
remaining = s
|
|
while remaining:
|
|
found = False
|
|
for suffix in sorted(_MODULE_SUFFIXES, key=len, reverse=True):
|
|
if remaining.endswith(suffix) and len(remaining) > len(suffix):
|
|
words.insert(0, suffix.capitalize())
|
|
remaining = remaining[:-len(suffix)]
|
|
found = True
|
|
break
|
|
if not found:
|
|
# Remaining is either a short acronym (≤3 chars) or a word
|
|
words.insert(0, remaining if len(remaining) <= 3 else remaining.capitalize())
|
|
break
|
|
return ''.join(words)
|
|
|
|
|
|
def _infer_module(filename: str, text: str) -> str:
|
|
m = re.search(r'\b([A-Z][A-Z0-9]+)_API\b', text)
|
|
if m:
|
|
return _caps_to_camel(m.group(1))
|
|
return Path(filename).stem
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Top-level parse_header
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def parse_header(filepath: str) -> ParsedHeader:
|
|
path = Path(filepath)
|
|
text = path.read_text(encoding='utf-8', errors='replace')
|
|
|
|
header = ParsedHeader(
|
|
filepath=str(path),
|
|
filename=path.name,
|
|
module_name=_infer_module(path.name, text),
|
|
)
|
|
|
|
i = 0
|
|
n = len(text)
|
|
editor_tracker = EditorOnlyTracker()
|
|
pending_comment: Optional[DocComment] = None
|
|
|
|
while i < n:
|
|
while i < n and text[i] in ' \t\r\n':
|
|
i += 1
|
|
if i >= n:
|
|
break
|
|
|
|
ch = text[i]
|
|
|
|
# Preprocessor
|
|
if ch == '#':
|
|
end = text.find('\n', i)
|
|
pp_line = text[i:(end if end != -1 else n)]
|
|
editor_tracker.handle_line(pp_line)
|
|
i = (end + 1) if end != -1 else n
|
|
pending_comment = None
|
|
continue
|
|
|
|
# Doc comment
|
|
if text[i:i+3] == '/**':
|
|
end = text.find('*/', i)
|
|
if end == -1:
|
|
i += 3
|
|
continue
|
|
pending_comment = parse_doc_comment(text[i:end + 2])
|
|
i = end + 2
|
|
continue
|
|
|
|
# Block comment
|
|
if text[i:i+2] == '/*':
|
|
end = text.find('*/', i)
|
|
i = (end + 2) if end != -1 else n
|
|
continue
|
|
|
|
# Line comment
|
|
if text[i:i+2] == '//':
|
|
end = text.find('\n', i)
|
|
line_text = text[i:(end if end != -1 else n)].lstrip('/').strip()
|
|
if pending_comment is None and line_text:
|
|
pending_comment = DocComment(description=line_text)
|
|
i = (end + 1) if end != -1 else n
|
|
continue
|
|
|
|
# Read identifier
|
|
m = re.match(r'\w+', text[i:])
|
|
if not m:
|
|
i += 1
|
|
continue
|
|
ident = m.group(0)
|
|
|
|
# Delegate macros
|
|
if ident.startswith('DECLARE_') and 'DELEGATE' in ident:
|
|
inner, ep = extract_balanced(text, i, '(', ')')
|
|
delegate = _parse_delegate(ident, inner, pending_comment)
|
|
header.delegates.append(delegate)
|
|
pending_comment = None
|
|
# skip to next statement
|
|
while ep < n and text[ep] in ' \t\r\n;':
|
|
ep += 1
|
|
i = ep
|
|
continue
|
|
|
|
# namespace
|
|
if ident == 'namespace':
|
|
m2 = re.match(r'namespace\s+([\w:]+)\s*\{', text[i:])
|
|
if not m2:
|
|
m2 = re.match(r'namespace\s+([\w:]+)\s*\n\s*\{', text[i:])
|
|
if m2:
|
|
ns_name = m2.group(1)
|
|
brace_pos = i + m2.end() - 1
|
|
cl = find_matching_close(text, brace_pos, '{', '}')
|
|
if cl != -1:
|
|
body = text[brace_pos + 1:cl]
|
|
fns = _parse_namespace_functions(body)
|
|
if fns:
|
|
ns = NamespaceInfo(name=ns_name, functions=fns)
|
|
header.namespaces.append(ns)
|
|
pending_comment = None
|
|
i = cl + 1
|
|
else:
|
|
i += m2.end()
|
|
else:
|
|
end = text.find('\n', i)
|
|
i = (end + 1) if end != -1 else n
|
|
continue
|
|
|
|
# UENUM
|
|
if ident == 'UENUM':
|
|
inner, ep = extract_balanced(text, i, '(', ')')
|
|
ue_specs = inner.strip()
|
|
comment = pending_comment
|
|
pending_comment = None
|
|
j = ep
|
|
while j < n and text[j] in ' \t\r\n':
|
|
j += 1
|
|
result = _parse_enum_at(text, j, ue_specs, comment, editor_tracker.editor_only)
|
|
if result:
|
|
header.enums.append(result[0])
|
|
i = result[1]
|
|
else:
|
|
i = j
|
|
continue
|
|
|
|
# UCLASS / USTRUCT
|
|
if ident in ('UCLASS', 'USTRUCT'):
|
|
inner, ep = extract_balanced(text, i, '(', ')')
|
|
ue_specs = inner.strip()
|
|
comment = pending_comment
|
|
pending_comment = None
|
|
j = ep
|
|
while j < n and text[j] in ' \t\r\n':
|
|
j += 1
|
|
result = _parse_class_or_struct_at(text, j, ue_specs, comment, ident)
|
|
if result:
|
|
header.classes.append(result[0])
|
|
i = result[1]
|
|
else:
|
|
i = j
|
|
continue
|
|
|
|
# Bare struct/class
|
|
if ident in ('struct', 'class'):
|
|
comment = pending_comment
|
|
pending_comment = None
|
|
result = _parse_class_or_struct_at(text, i, "", comment, "")
|
|
if result:
|
|
header.classes.append(result[0])
|
|
i = result[1]
|
|
else:
|
|
end = text.find('\n', i)
|
|
i = (end + 1) if end != -1 else n
|
|
continue
|
|
|
|
# Bare enum
|
|
if ident == 'enum':
|
|
result = _parse_enum_at(text, i, "", pending_comment, editor_tracker.editor_only)
|
|
if result:
|
|
header.enums.append(result[0])
|
|
pending_comment = None
|
|
i = result[1]
|
|
else:
|
|
end = text.find('\n', i)
|
|
i = (end + 1) if end != -1 else n
|
|
continue
|
|
|
|
# Skip line
|
|
end = text.find('\n', i)
|
|
pending_comment = None
|
|
i = (end + 1) if end != -1 else n
|
|
|
|
return header
|