Files
mcp-ue/ue_parser.py
Pierre-Marie Charavel 93ca33c36a Add UnrealDocGenerator tool and UE API skill
- ue_parser.py: position-based UE C++ header parser
- ue_markdown.py: compact agent-optimised Markdown renderer
- generate.py: two-pass CLI (parse-all → type index → render-all)
- samples/: representative UE headers (GeomUtils, AIController, GameplayTagsManager)
- .claude/skills/ue-api/: Claude Code skill for querying UE docs + source headers
- CLAUDE.md: architecture notes, usage, critical gotchas

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 06:55:05 -05:00

1184 lines
39 KiB
Python

"""
ue_parser.py — Parse Unreal Engine C++ header files into Python dataclasses.
"""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
# ---------------------------------------------------------------------------
# Data model
# ---------------------------------------------------------------------------
@dataclass
class DocComment:
description: str = ""
params: dict = field(default_factory=dict) # name -> desc
returns: str = ""
@dataclass
class PropertyInfo:
name: str = ""
type: str = ""
specifiers: str = "" # raw UPROPERTY(...) content
access: str = "public"
editor_only: bool = False
comment: Optional[DocComment] = None
@dataclass
class FunctionInfo:
name: str = ""
return_type: str = ""
raw_params: str = ""
full_signature: str = ""
uf_specifiers: str = "" # raw UFUNCTION(...) content
comment: Optional[DocComment] = None
modifiers: list = field(default_factory=list)
access: str = "public"
is_deprecated: bool = False
deprecated_version: str = ""
deprecated_msg: str = ""
editor_only: bool = False
@dataclass
class EnumValue:
name: str = ""
value: str = ""
comment: str = ""
@dataclass
class EnumInfo:
name: str = ""
underlying_type: str = ""
ue_specifiers: str = ""
comment: Optional[DocComment] = None
values: list = field(default_factory=list)
editor_only: bool = False
@dataclass
class ClassInfo:
name: str = ""
kind: str = "class"
bases: list = field(default_factory=list)
ue_specifiers: str = ""
module_api: str = ""
comment: Optional[DocComment] = None
properties: list = field(default_factory=list)
functions: list = field(default_factory=list)
nested_enums: list = field(default_factory=list)
@dataclass
class DelegateInfo:
name: str = ""
macro: str = ""
params: list = field(default_factory=list) # list[(type, name)]
comment: Optional[DocComment] = None
is_multicast: bool = False
is_dynamic: bool = False
@dataclass
class FreeFunction:
name: str = ""
full_signature: str = ""
return_type: str = ""
raw_params: str = ""
comment: Optional[DocComment] = None
modifiers: list = field(default_factory=list)
@dataclass
class NamespaceInfo:
name: str = ""
functions: list = field(default_factory=list)
@dataclass
class ParsedHeader:
filepath: str = ""
filename: str = ""
module_name: str = ""
classes: list = field(default_factory=list)
enums: list = field(default_factory=list)
delegates: list = field(default_factory=list)
namespaces: list = field(default_factory=list)
free_functions: list = field(default_factory=list)
# ---------------------------------------------------------------------------
# Core utilities
# ---------------------------------------------------------------------------
def find_matching_close(text: str, open_pos: int, open_ch: str, close_ch: str) -> int:
"""
Given text[open_pos] == open_ch, return index of matching close_ch.
Skips // comments, /* */ comments, and string literals.
Returns -1 if not found.
"""
depth = 0
i = open_pos
n = len(text)
while i < n:
ch = text[i]
# Line comment
if ch == '/' and i + 1 < n and text[i + 1] == '/':
while i < n and text[i] != '\n':
i += 1
continue
# Block comment
if ch == '/' and i + 1 < n and text[i + 1] == '*':
i += 2
while i < n - 1 and not (text[i] == '*' and text[i + 1] == '/'):
i += 1
i += 2
continue
# String literal
if ch == '"':
i += 1
while i < n:
if text[i] == '\\':
i += 2
continue
if text[i] == '"':
break
i += 1
i += 1
continue
if ch == open_ch:
depth += 1
elif ch == close_ch:
depth -= 1
if depth == 0:
return i
i += 1
return -1
def extract_balanced(text: str, start: int, open_ch: str, close_ch: str):
"""
Find open_ch at or after start, return (inner_content, end_pos).
end_pos is one past the closing char. Returns ("", start) if not found.
"""
op = text.find(open_ch, start)
if op == -1:
return ("", start)
cl = find_matching_close(text, op, open_ch, close_ch)
if cl == -1:
return ("", start)
return (text[op + 1:cl], cl + 1)
def _split_params(raw_params: str) -> list:
"""Split param string respecting <> and () nesting."""
params = []
depth = 0
current = []
for ch in raw_params:
if ch in '<({':
depth += 1
current.append(ch)
elif ch in '>)}':
depth -= 1
current.append(ch)
elif ch == ',' and depth == 0:
p = ''.join(current).strip()
if p:
params.append(p)
current = []
else:
current.append(ch)
p = ''.join(current).strip()
if p:
params.append(p)
return params
# ---------------------------------------------------------------------------
# Doc comment parsing
# ---------------------------------------------------------------------------
def parse_doc_comment(raw: str) -> DocComment:
"""Parse a /** ... */ or // ... doc comment block."""
text = raw.strip()
if text.startswith('/**'):
text = text[3:]
if text.endswith('*/'):
text = text[:-2]
lines = []
for line in text.split('\n'):
line = line.strip().lstrip('*').strip()
lines.append(line)
doc = DocComment()
desc_lines = []
i = 0
while i < len(lines):
line = lines[i]
# @param
m = re.match(r'@param(?:\[[\w,]+\])?\s+(\w+)\s*(.*)', line, re.IGNORECASE)
if m:
pname, pdesc = m.group(1), m.group(2).strip()
i += 1
while i < len(lines) and lines[i] and not lines[i].startswith('@'):
pdesc += ' ' + lines[i]
i += 1
doc.params[pname] = pdesc.strip(' -')
continue
# @return / @returns
m = re.match(r'@returns?\s*(.*)', line, re.IGNORECASE)
if m:
rdesc = m.group(1).strip()
i += 1
while i < len(lines) and lines[i] and not lines[i].startswith('@'):
rdesc += ' ' + lines[i]
i += 1
doc.returns = rdesc.strip(' -')
continue
# @note, @see etc
m = re.match(r'@(?:note|see|todo)\s*(.*)', line, re.IGNORECASE)
if m:
note = m.group(1).strip()
if note:
desc_lines.append(note)
i += 1
continue
# Skip other @tags
if re.match(r'@\w+', line):
i += 1
continue
desc_lines.append(line)
i += 1
doc.description = ' '.join(l for l in desc_lines if l).strip()
return doc
# ---------------------------------------------------------------------------
# Preprocessor tracking
# ---------------------------------------------------------------------------
class EditorOnlyTracker:
EDITOR_GUARDS = {'WITH_EDITOR', 'WITH_EDITORONLY_DATA'}
def __init__(self):
self._stack: list[bool] = []
def handle_line(self, line: str):
s = line.strip()
if s.startswith('#if ') or s.startswith('#ifdef '):
cond = s.split(None, 1)[1].strip() if ' ' in s else ''
is_editor = any(g in cond for g in self.EDITOR_GUARDS)
self._stack.append(is_editor)
elif s.startswith('#elif') or s.startswith('#else'):
if self._stack:
self._stack[-1] = False
elif s.startswith('#endif'):
if self._stack:
self._stack.pop()
@property
def editor_only(self) -> bool:
return any(self._stack)
# ---------------------------------------------------------------------------
# Function signature parsing
# ---------------------------------------------------------------------------
_API_RE = re.compile(r'\b[A-Z][A-Z0-9_]+_API\b\s*')
_MUTABLE_RE = re.compile(r'\bmutable\b\s*')
_PREFIX_MODS = {'virtual', 'static', 'inline', 'explicit', 'friend',
'forceinline', 'forcenoinline', 'ue_nodiscard', 'constexpr',
'extern'}
def _clean_type(t: str) -> str:
return ' '.join(t.split())
def _parse_function_signature(sig: str):
"""
Parse: [modifiers] [return_type] name(params)
Returns (return_type, name, raw_params, modifiers).
"""
sig = _API_RE.sub('', sig).strip()
# Find the params: rightmost balanced () pair
paren_close = len(sig) - 1
while paren_close >= 0 and sig[paren_close] != ')':
paren_close -= 1
if paren_close < 0:
return "", sig.strip(), "", []
# Walk backward from paren_close to find matching (
depth = 0
paren_open = paren_close
while paren_open >= 0:
if sig[paren_open] == ')':
depth += 1
elif sig[paren_open] == '(':
depth -= 1
if depth == 0:
break
paren_open -= 1
if paren_open < 0:
return "", sig.strip(), "", []
raw_params = sig[paren_open + 1:paren_close].strip()
before = sig[:paren_open].strip()
# Extract leading modifier keywords
modifiers = []
words = before.split()
i = 0
while i < len(words):
w = words[i].lower()
if w in _PREFIX_MODS:
modifiers.append(words[i].lower())
i += 1
else:
break
before = ' '.join(words[i:])
# Last token is function name
tokens = before.rsplit(None, 1)
if len(tokens) == 2:
return_type = _clean_type(tokens[0])
name = tokens[1]
elif len(tokens) == 1:
return_type = ""
name = tokens[0]
else:
return_type = ""
name = ""
# Handle pointer/ref in name vs return_type
while name.startswith(('*', '&')):
return_type = return_type + name[0]
name = name[1:]
return _clean_type(return_type), name.strip(), raw_params, modifiers
# ---------------------------------------------------------------------------
# Enum body parsing
# ---------------------------------------------------------------------------
def _parse_enum_body(body: str) -> list:
values = []
for line in body.split('\n'):
comment = ""
ci = line.find('//')
if ci != -1:
comment = line[ci + 2:].strip()
line = line[:ci]
line = line.strip().rstrip(',').strip()
if not line:
continue
# Skip UMETA(...)
line = re.sub(r'UMETA\s*\([^)]*\)', '', line).strip()
if not line:
continue
m = re.match(r'(\w+)\s*=\s*(.*)', line)
if m:
values.append(EnumValue(name=m.group(1).strip(),
value=m.group(2).strip(), comment=comment))
elif re.match(r'\w+\s*$', line):
values.append(EnumValue(name=line.strip(), value="", comment=comment))
return values
# ---------------------------------------------------------------------------
# Delegate parsing
# ---------------------------------------------------------------------------
def _parse_delegate(macro: str, args_str: str,
comment: Optional[DocComment]) -> DelegateInfo:
is_dynamic = 'DYNAMIC' in macro
is_multicast = 'MULTICAST' in macro
args = [a.strip() for a in args_str.split(',') if a.strip()]
name = args[0] if args else ""
rest = args[1:]
params = []
for i in range(0, len(rest) - 1, 2):
ptype = rest[i].strip()
pname = rest[i + 1].strip() if i + 1 < len(rest) else ""
if ptype:
params.append((ptype, pname))
return DelegateInfo(name=name, macro=macro, params=params, comment=comment,
is_multicast=is_multicast, is_dynamic=is_dynamic)
# ---------------------------------------------------------------------------
# Class body parsing — line-based approach to avoid regex backtracking
# ---------------------------------------------------------------------------
# Macros to skip entirely (skip line when encountered)
_SKIP_MACROS = frozenset({
'GENERATED_BODY', 'GENERATED_UCLASS_BODY', 'GENERATED_USTRUCT_BODY',
'GENERATED_USTRUCT_BODY', 'check', 'checkSlow', 'ensure', 'checkf',
'UE_LOG', 'DECLARE_LOG_CATEGORY_EXTERN', 'DECLARE_STATS_GROUP',
})
# Macros we handle explicitly (don't treat as regular lines)
_HANDLED_MACROS = frozenset({
'UPROPERTY', 'UFUNCTION', 'UENUM', 'UE_DEPRECATED', 'UE_DEPRECATED_FORGAME',
'DECLARE_DELEGATE', 'DECLARE_MULTICAST_DELEGATE', 'DECLARE_DYNAMIC_DELEGATE',
'DECLARE_DYNAMIC_MULTICAST_DELEGATE', 'DECLARE_TS_MULTICAST_DELEGATE',
'DECLARE_MULTICAST_DELEGATE_OneParam', 'DECLARE_MULTICAST_DELEGATE_TwoParams',
'DECLARE_MULTICAST_DELEGATE_ThreeParams',
})
# C++ keywords that start non-function statements
_SKIP_KEYWORDS = frozenset({
'if', 'else', 'while', 'for', 'do', 'switch', 'return', 'break',
'continue', 'typedef', 'using', 'namespace', 'template',
'throw', 'try', 'catch', 'static_assert', 'static_cast',
'reinterpret_cast', 'dynamic_cast', 'const_cast',
})
def _parse_class_body(body: str, class_kind: str = 'class') -> tuple:
"""
Parse class/struct body. Returns (properties, functions, nested_enums).
Uses a position-based scanner; falls back to line-skip for unrecognized patterns.
"""
properties: list = []
functions: list = []
nested_enums: list = []
access = 'private' if class_kind == 'class' else 'public'
editor_tracker = EditorOnlyTracker()
pending_comment: Optional[DocComment] = None
pending_deprecated = False
pending_dep_version = ""
pending_dep_msg = ""
i = 0
n = len(body)
def advance_line() -> int:
"""Return position after the next newline from i."""
end = body.find('\n', i)
return (end + 1) if end != -1 else n
def skip_block(pos: int) -> int:
"""Skip a {…} block starting at or after pos."""
bp = body.find('{', pos)
if bp == -1:
return pos
cl = find_matching_close(body, bp, '{', '}')
if cl == -1:
return pos
ep = cl + 1
while ep < n and body[ep] in ' \t\r\n':
ep += 1
if ep < n and body[ep] == ';':
ep += 1
return ep
while i < n:
# Skip pure whitespace
while i < n and body[i] in ' \t\r\n':
i += 1
if i >= n:
break
ch = body[i]
# --- Preprocessor ---
if ch == '#':
end = body.find('\n', i)
pp_line = body[i:(end if end != -1 else n)]
editor_tracker.handle_line(pp_line)
i = (end + 1) if end != -1 else n
continue
# --- Doc comment /** */ ---
if body[i:i+3] == '/**':
end = body.find('*/', i)
if end == -1:
i += 3
continue
pending_comment = parse_doc_comment(body[i:end + 2])
i = end + 2
continue
# --- Block comment /* */ ---
if body[i:i+2] == '/*':
end = body.find('*/', i)
i = (end + 2) if end != -1 else n
continue
# --- Line comment // ---
if body[i:i+2] == '//':
end = body.find('\n', i)
line_text = body[i:(end if end != -1 else n)].lstrip('/').strip()
if pending_comment is None and line_text:
pending_comment = DocComment(description=line_text)
i = (end + 1) if end != -1 else n
continue
# --- Read identifier at current position ---
m = re.match(r'\w+', body[i:])
if not m:
# Not an identifier; skip character
i += 1
continue
ident = m.group(0)
# --- Access specifier: public: protected: private: ---
if ident in ('public', 'protected', 'private'):
colon_pos = i + len(ident)
while colon_pos < n and body[colon_pos] in ' \t':
colon_pos += 1
if colon_pos < n and body[colon_pos] == ':':
access = ident
i = colon_pos + 1
pending_comment = None
continue
# --- GENERATED_BODY etc ---
if ident in _SKIP_MACROS or (ident.startswith('GENERATED_') and ident.endswith('BODY')):
# Skip to end of line or semicolon
end = body.find('\n', i)
i = (end + 1) if end != -1 else n
pending_comment = None
continue
# --- UE_DEPRECATED / UE_DEPRECATED_FORGAME ---
if ident in ('UE_DEPRECATED', 'UE_DEPRECATED_FORGAME'):
inner, ep = extract_balanced(body, i, '(', ')')
parts = inner.split(',', 1)
pending_dep_version = parts[0].strip().strip('"')
pending_dep_msg = parts[1].strip().strip('"') if len(parts) > 1 else ""
pending_deprecated = True
i = ep
continue
# --- UPROPERTY ---
if ident == 'UPROPERTY':
inner, ep = extract_balanced(body, i, '(', ')')
specifiers = inner.strip()
comment = pending_comment
pending_comment = None
# Advance to property declaration
j = ep
while j < n and body[j] in ' \t\r\n':
j += 1
semi = body.find(';', j)
if semi == -1:
i = ep
continue
decl = _API_RE.sub('', body[j:semi]).strip()
decl = _MUTABLE_RE.sub('', decl).strip()
# Parse: TYPE NAME [: bits]
m2 = re.match(r'(.*?)\s+(\w+)\s*(?::\s*\d+)?\s*$', decl, re.DOTALL)
if m2:
ptype = _clean_type(m2.group(1))
pname = m2.group(2)
bf = re.search(r':\s*(\d+)', decl)
if bf:
ptype += ' : ' + bf.group(1)
else:
ptype = decl
pname = ""
prop = PropertyInfo(name=pname, type=ptype, specifiers=specifiers,
access=access, editor_only=editor_tracker.editor_only,
comment=comment)
properties.append(prop)
i = semi + 1
continue
# --- UFUNCTION ---
if ident == 'UFUNCTION':
inner, ep = extract_balanced(body, i, '(', ')')
uf_specs = inner.strip()
comment = pending_comment
pending_comment = None
j = ep
while j < n and body[j] in ' \t\r\n':
j += 1
# Read function signature until ; or {
k = j
depth = 0
while k < n:
if body[k] == '(':
depth += 1
elif body[k] == ')':
depth -= 1
elif body[k] == '{' and depth == 0:
break
elif body[k] == ';' and depth == 0:
break
k += 1
sig_text = _API_RE.sub('', body[j:k]).strip()
# Skip body if inline
end_fn = k
if k < n and body[k] == '{':
cl = find_matching_close(body, k, '{', '}')
end_fn = (cl + 1) if cl != -1 else k + 1
elif k < n and body[k] == ';':
end_fn = k + 1
ret, name, raw_params, mods = _parse_function_signature(sig_text)
fn = FunctionInfo(
name=name, return_type=ret, raw_params=raw_params,
full_signature=sig_text.strip(), uf_specifiers=uf_specs,
comment=comment, modifiers=mods, access=access,
is_deprecated=pending_deprecated, deprecated_version=pending_dep_version,
deprecated_msg=pending_dep_msg, editor_only=editor_tracker.editor_only,
)
functions.append(fn)
pending_deprecated = False
pending_dep_version = ""
pending_dep_msg = ""
i = end_fn
continue
# --- UENUM ---
if ident == 'UENUM':
inner, ep = extract_balanced(body, i, '(', ')')
ue_specs = inner.strip()
comment = pending_comment
pending_comment = None
j = ep
while j < n and body[j] in ' \t\r\n':
j += 1
result = _parse_enum_at(body, j, ue_specs, comment, editor_tracker.editor_only)
if result:
nested_enums.append(result[0])
i = result[1]
else:
i = j
continue
# --- DECLARE_*_DELEGATE inside class (inline delegate typedef) ---
if ident.startswith('DECLARE_') and 'DELEGATE' in ident:
# Skip to end of line
end = body.find('\n', i)
i = (end + 1) if end != -1 else n
pending_comment = None
continue
# --- Nested struct/class ---
if ident in ('struct', 'class'):
# Find the next { - check if this is a definition (not forward decl)
bp = body.find('{', i)
semi = body.find(';', i)
if bp != -1 and (semi == -1 or bp < semi):
# Has a body - skip the whole nested class
i = skip_block(bp)
pending_comment = None
else:
# Forward declaration - skip to ;
i = (semi + 1) if semi != -1 else advance_line()
pending_comment = None
continue
# --- Nested enum ---
if ident == 'enum':
result = _parse_enum_at(body, i, "", pending_comment, editor_tracker.editor_only)
if result:
nested_enums.append(result[0])
pending_comment = None
i = result[1]
else:
i = advance_line()
pending_comment = None
continue
# --- Skip C++ keywords that aren't declarations ---
if ident in _SKIP_KEYWORDS:
i = advance_line()
pending_comment = None
continue
# --- Try to parse as a function or method ---
# Look for '(' on the current line (safe: no cross-line backtracking)
line_end = body.find('\n', i)
if line_end == -1:
line_end = n
current_line = body[i:line_end]
paren_in_line = current_line.find('(')
if paren_in_line != -1:
abs_open = i + paren_in_line
# Make sure there's a word just before '('
before_paren = body[i:abs_open].rstrip()
if before_paren and re.search(r'\w$', before_paren):
cl_pos = find_matching_close(body, abs_open, '(', ')')
if cl_pos != -1:
sig_text = _API_RE.sub('', body[i:cl_pos + 1]).strip()
sig_text = _MUTABLE_RE.sub('', sig_text).strip()
ret, name, raw_params, mods = _parse_function_signature(sig_text)
# Skip obvious non-functions
skip = (not name or name in _SKIP_KEYWORDS or
name in _SKIP_MACROS or name in _HANDLED_MACROS)
# All-uppercase is probably a macro
if name and name == name.upper() and '_' in name:
skip = True
if not skip:
# Skip inline body or find end of declaration
end_fn = cl_pos + 1
k = end_fn
while k < n and body[k] in ' \t':
k += 1
if k < n and body[k] == '{':
cl2 = find_matching_close(body, k, '{', '}')
end_fn = (cl2 + 1) if cl2 != -1 else k + 1
else:
# Find ; on current or next line
semi = body.find(';', cl_pos + 1)
nl = body.find('\n', cl_pos + 1)
if semi != -1 and (nl == -1 or semi <= nl + 2):
end_fn = semi + 1
else:
end_fn = (nl + 1) if nl != -1 else n
comment = pending_comment
pending_comment = None
fn = FunctionInfo(
name=name, return_type=ret, raw_params=raw_params,
full_signature=sig_text.strip(), uf_specifiers="",
comment=comment, modifiers=mods, access=access,
is_deprecated=pending_deprecated,
deprecated_version=pending_dep_version,
deprecated_msg=pending_dep_msg,
editor_only=editor_tracker.editor_only,
)
functions.append(fn)
pending_deprecated = False
pending_dep_version = ""
pending_dep_msg = ""
i = end_fn
continue
# --- Default: skip line ---
i = advance_line()
pending_comment = None
return properties, functions, nested_enums
# ---------------------------------------------------------------------------
# Enum at position
# ---------------------------------------------------------------------------
def _parse_enum_at(text: str, pos: int, ue_specs: str,
comment: Optional[DocComment], editor_only: bool):
"""Parse enum starting at pos. Returns (EnumInfo, end_pos) or None."""
m = re.match(r'enum\s+(?:class\s+)?(\w+)\s*(?::\s*(\w+))?\s*\{',
text[pos:], re.DOTALL)
if not m:
return None
name = m.group(1)
underlying = m.group(2) or ""
brace_start = pos + m.end() - 1
cl = find_matching_close(text, brace_start, '{', '}')
if cl == -1:
return None
body = text[brace_start + 1:cl]
values = _parse_enum_body(body)
end_pos = cl + 1
while end_pos < len(text) and text[end_pos] in ' \t\r\n':
end_pos += 1
if end_pos < len(text) and text[end_pos] == ';':
end_pos += 1
return EnumInfo(name=name, underlying_type=underlying, ue_specifiers=ue_specs,
comment=comment, values=values, editor_only=editor_only), end_pos
# ---------------------------------------------------------------------------
# Class/struct at position
# ---------------------------------------------------------------------------
def _parse_class_or_struct_at(text: str, pos: int, ue_specs: str,
comment: Optional[DocComment], macro: str):
"""Parse class/struct at pos. Returns (ClassInfo, end_pos) or None."""
m = re.match(
r'(class|struct)\s+'
r'(?:([A-Z][A-Z0-9_]+_API)\s+)?'
r'(\w+)'
r'(?:\s*:\s*([^{;]+?))?'
r'\s*\{',
text[pos:], re.DOTALL
)
if not m:
return None
kind = m.group(1)
module_api = m.group(2) or ""
name = m.group(3)
bases_str = m.group(4) or ""
bases = []
for b in bases_str.split(','):
b = b.strip()
b = re.sub(r'^(?:public|protected|private)\s+', '', b).strip()
if b:
bases.append(b)
brace_pos = pos + m.end() - 1
cl = find_matching_close(text, brace_pos, '{', '}')
if cl == -1:
return None
body = text[brace_pos + 1:cl]
props, fns, nested_enums = _parse_class_body(body, kind)
end_pos = cl + 1
while end_pos < len(text) and text[end_pos] in ' \t\r\n':
end_pos += 1
if end_pos < len(text) and text[end_pos] == ';':
end_pos += 1
return ClassInfo(name=name, kind=kind, bases=bases, ue_specifiers=ue_specs,
module_api=module_api, comment=comment, properties=props,
functions=fns, nested_enums=nested_enums), end_pos
# ---------------------------------------------------------------------------
# Namespace free-function parsing
# ---------------------------------------------------------------------------
def _parse_namespace_functions(body: str) -> list:
"""Parse free functions in a namespace body."""
functions = []
i = 0
n = len(body)
pending_comment: Optional[DocComment] = None
while i < n:
while i < n and body[i] in ' \t\r\n':
i += 1
if i >= n:
break
ch = body[i]
if ch == '#':
end = body.find('\n', i)
i = (end + 1) if end != -1 else n
continue
if body[i:i+3] == '/**':
end = body.find('*/', i)
if end == -1:
i += 3
continue
pending_comment = parse_doc_comment(body[i:end + 2])
i = end + 2
continue
if body[i:i+2] == '/*':
end = body.find('*/', i)
i = (end + 2) if end != -1 else n
continue
if body[i:i+2] == '//':
end = body.find('\n', i)
line_text = body[i:(end if end != -1 else n)].lstrip('/').strip()
if pending_comment is None and line_text:
pending_comment = DocComment(description=line_text)
i = (end + 1) if end != -1 else n
continue
# typedef/using
if body[i:].startswith(('typedef ', 'using ')):
end = body.find(';', i)
i = (end + 1) if end != -1 else n
pending_comment = None
continue
# struct/class/enum — skip
m = re.match(r'(struct|class|enum)\b', body[i:])
if m and m.start() == 0:
bp = body.find('{', i)
semi = body.find(';', i)
if bp != -1 and (semi == -1 or bp < semi):
cl = find_matching_close(body, bp, '{', '}')
ep = cl + 1 if cl != -1 else bp + 1
while ep < n and body[ep] in ' \t\r\n':
ep += 1
if ep < n and body[ep] == ';':
ep += 1
i = ep
else:
i = (semi + 1) if semi != -1 else n
pending_comment = None
continue
# Look for function on current line
line_end = body.find('\n', i)
if line_end == -1:
line_end = n
current_line = body[i:line_end]
paren_in_line = current_line.find('(')
if paren_in_line != -1:
abs_open = i + paren_in_line
before_paren = body[i:abs_open].rstrip()
if before_paren and re.search(r'\w$', before_paren):
cl_pos = find_matching_close(body, abs_open, '(', ')')
if cl_pos != -1:
sig_text = _API_RE.sub('', body[i:cl_pos + 1]).strip()
ret, name, raw_params, mods = _parse_function_signature(sig_text)
skip = (not name or name in _SKIP_KEYWORDS or
name in _SKIP_MACROS)
if name and name == name.upper() and '_' in name:
skip = True
if not skip:
end_fn = cl_pos + 1
k = end_fn
while k < n and body[k] in ' \t\r\n':
k += 1
if k < n and body[k] == '{':
cl2 = find_matching_close(body, k, '{', '}')
end_fn = (cl2 + 1) if cl2 != -1 else k + 1
elif k < n and body[k] == ';':
end_fn = k + 1
else:
end_fn = line_end + 1
fn = FreeFunction(name=name, full_signature=sig_text.strip(),
return_type=ret, raw_params=raw_params,
comment=pending_comment, modifiers=mods)
functions.append(fn)
pending_comment = None
i = end_fn
continue
# Skip line
i = line_end + 1
pending_comment = None
return functions
# ---------------------------------------------------------------------------
# Module name inference
# ---------------------------------------------------------------------------
_MODULE_SUFFIXES = [
'MODULE', 'MANAGER', 'SYSTEM', 'EDITOR', 'UTILS', 'TYPES',
'SETTINGS', 'TAGS', 'ENGINE', 'CORE', 'INTERFACE', 'COMPONENT',
'SUBSYSTEM', 'PLUGIN', 'RUNTIME', 'TASK', 'TASKS', 'GAME',
]
def _caps_to_camel(s: str) -> str:
"""Convert ALL-CAPS identifier to CamelCase: AIMODULE->AIModule, GAMEPLAYTAGS->GameplayTags."""
if not s:
return s
if '_' in s:
return ''.join(w.capitalize() for w in s.split('_'))
words = []
remaining = s
while remaining:
found = False
for suffix in sorted(_MODULE_SUFFIXES, key=len, reverse=True):
if remaining.endswith(suffix) and len(remaining) > len(suffix):
words.insert(0, suffix.capitalize())
remaining = remaining[:-len(suffix)]
found = True
break
if not found:
# Remaining is either a short acronym (≤3 chars) or a word
words.insert(0, remaining if len(remaining) <= 3 else remaining.capitalize())
break
return ''.join(words)
def _infer_module(filename: str, text: str) -> str:
m = re.search(r'\b([A-Z][A-Z0-9]+)_API\b', text)
if m:
return _caps_to_camel(m.group(1))
return Path(filename).stem
# ---------------------------------------------------------------------------
# Top-level parse_header
# ---------------------------------------------------------------------------
def parse_header(filepath: str) -> ParsedHeader:
path = Path(filepath)
text = path.read_text(encoding='utf-8', errors='replace')
header = ParsedHeader(
filepath=str(path),
filename=path.name,
module_name=_infer_module(path.name, text),
)
i = 0
n = len(text)
editor_tracker = EditorOnlyTracker()
pending_comment: Optional[DocComment] = None
while i < n:
while i < n and text[i] in ' \t\r\n':
i += 1
if i >= n:
break
ch = text[i]
# Preprocessor
if ch == '#':
end = text.find('\n', i)
pp_line = text[i:(end if end != -1 else n)]
editor_tracker.handle_line(pp_line)
i = (end + 1) if end != -1 else n
pending_comment = None
continue
# Doc comment
if text[i:i+3] == '/**':
end = text.find('*/', i)
if end == -1:
i += 3
continue
pending_comment = parse_doc_comment(text[i:end + 2])
i = end + 2
continue
# Block comment
if text[i:i+2] == '/*':
end = text.find('*/', i)
i = (end + 2) if end != -1 else n
continue
# Line comment
if text[i:i+2] == '//':
end = text.find('\n', i)
line_text = text[i:(end if end != -1 else n)].lstrip('/').strip()
if pending_comment is None and line_text:
pending_comment = DocComment(description=line_text)
i = (end + 1) if end != -1 else n
continue
# Read identifier
m = re.match(r'\w+', text[i:])
if not m:
i += 1
continue
ident = m.group(0)
# Delegate macros
if ident.startswith('DECLARE_') and 'DELEGATE' in ident:
inner, ep = extract_balanced(text, i, '(', ')')
delegate = _parse_delegate(ident, inner, pending_comment)
header.delegates.append(delegate)
pending_comment = None
# skip to next statement
while ep < n and text[ep] in ' \t\r\n;':
ep += 1
i = ep
continue
# namespace
if ident == 'namespace':
m2 = re.match(r'namespace\s+([\w:]+)\s*\{', text[i:])
if not m2:
m2 = re.match(r'namespace\s+([\w:]+)\s*\n\s*\{', text[i:])
if m2:
ns_name = m2.group(1)
brace_pos = i + m2.end() - 1
cl = find_matching_close(text, brace_pos, '{', '}')
if cl != -1:
body = text[brace_pos + 1:cl]
fns = _parse_namespace_functions(body)
if fns:
ns = NamespaceInfo(name=ns_name, functions=fns)
header.namespaces.append(ns)
pending_comment = None
i = cl + 1
else:
i += m2.end()
else:
end = text.find('\n', i)
i = (end + 1) if end != -1 else n
continue
# UENUM
if ident == 'UENUM':
inner, ep = extract_balanced(text, i, '(', ')')
ue_specs = inner.strip()
comment = pending_comment
pending_comment = None
j = ep
while j < n and text[j] in ' \t\r\n':
j += 1
result = _parse_enum_at(text, j, ue_specs, comment, editor_tracker.editor_only)
if result:
header.enums.append(result[0])
i = result[1]
else:
i = j
continue
# UCLASS / USTRUCT
if ident in ('UCLASS', 'USTRUCT'):
inner, ep = extract_balanced(text, i, '(', ')')
ue_specs = inner.strip()
comment = pending_comment
pending_comment = None
j = ep
while j < n and text[j] in ' \t\r\n':
j += 1
result = _parse_class_or_struct_at(text, j, ue_specs, comment, ident)
if result:
header.classes.append(result[0])
i = result[1]
else:
i = j
continue
# Bare struct/class
if ident in ('struct', 'class'):
comment = pending_comment
pending_comment = None
result = _parse_class_or_struct_at(text, i, "", comment, "")
if result:
header.classes.append(result[0])
i = result[1]
else:
end = text.find('\n', i)
i = (end + 1) if end != -1 else n
continue
# Bare enum
if ident == 'enum':
result = _parse_enum_at(text, i, "", pending_comment, editor_tracker.editor_only)
if result:
header.enums.append(result[0])
pending_comment = None
i = result[1]
else:
end = text.find('\n', i)
i = (end + 1) if end != -1 else n
continue
# Skip line
end = text.find('\n', i)
pending_comment = None
i = (end + 1) if end != -1 else n
return header