mcp-ue/ue_parser.py

"""
ue_parser.py — Parse Unreal Engine C++ header files into Python dataclasses.
"""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional


# ---------------------------------------------------------------------------
# Data model
# ---------------------------------------------------------------------------

@dataclass
class DocComment:
    description: str = ""
    params: dict = field(default_factory=dict)   # name -> desc
    returns: str = ""


@dataclass
class PropertyInfo:
    name: str = ""
    type: str = ""
    specifiers: str = ""   # raw UPROPERTY(...) content
    access: str = "public"
    editor_only: bool = False
    comment: Optional[DocComment] = None


@dataclass
class FunctionInfo:
    name: str = ""
    return_type: str = ""
    raw_params: str = ""
    full_signature: str = ""
    uf_specifiers: str = ""   # raw UFUNCTION(...) content
    comment: Optional[DocComment] = None
    modifiers: list = field(default_factory=list)
    access: str = "public"
    is_deprecated: bool = False
    deprecated_version: str = ""
    deprecated_msg: str = ""
    editor_only: bool = False


@dataclass
class EnumValue:
    name: str = ""
    value: str = ""
    comment: str = ""


@dataclass
class EnumInfo:
    name: str = ""
    underlying_type: str = ""
    ue_specifiers: str = ""
    comment: Optional[DocComment] = None
    values: list = field(default_factory=list)
    editor_only: bool = False


@dataclass
class ClassInfo:
    name: str = ""
    kind: str = "class"
    bases: list = field(default_factory=list)
    ue_specifiers: str = ""
    module_api: str = ""
    comment: Optional[DocComment] = None
    properties: list = field(default_factory=list)
    functions: list = field(default_factory=list)
    nested_enums: list = field(default_factory=list)


@dataclass
class DelegateInfo:
    name: str = ""
    macro: str = ""
    params: list = field(default_factory=list)   # list[(type, name)]
    comment: Optional[DocComment] = None
    is_multicast: bool = False
    is_dynamic: bool = False


@dataclass
class FreeFunction:
    name: str = ""
    full_signature: str = ""
    return_type: str = ""
    raw_params: str = ""
    comment: Optional[DocComment] = None
    modifiers: list = field(default_factory=list)


@dataclass
class NamespaceInfo:
    name: str = ""
    functions: list = field(default_factory=list)


@dataclass
class ParsedHeader:
    filepath: str = ""
    filename: str = ""
    module_name: str = ""
    classes: list = field(default_factory=list)
    enums: list = field(default_factory=list)
    delegates: list = field(default_factory=list)
    namespaces: list = field(default_factory=list)
    free_functions: list = field(default_factory=list)


# ---------------------------------------------------------------------------
# Core utilities
# ---------------------------------------------------------------------------

def find_matching_close(text: str, open_pos: int, open_ch: str, close_ch: str) -> int:
    """
    Given text[open_pos] == open_ch, return index of matching close_ch.
    Skips // comments, /* */ comments, and string literals.
    Returns -1 if not found.
    """
    depth = 0
    i = open_pos
    n = len(text)
    while i < n:
        ch = text[i]
        # Line comment
        if ch == '/' and i + 1 < n and text[i + 1] == '/':
            while i < n and text[i] != '\n':
                i += 1
            continue
        # Block comment
        if ch == '/' and i + 1 < n and text[i + 1] == '*':
            i += 2
            while i < n - 1 and not (text[i] == '*' and text[i + 1] == '/'):
                i += 1
            i += 2
            continue
        # String literal
        if ch == '"':
            i += 1
            while i < n:
                if text[i] == '\\':
                    i += 2
                    continue
                if text[i] == '"':
                    break
                i += 1
            i += 1
            continue
        if ch == open_ch:
            depth += 1
        elif ch == close_ch:
            depth -= 1
            if depth == 0:
                return i
        i += 1
    return -1


def extract_balanced(text: str, start: int, open_ch: str, close_ch: str):
    """
    Find open_ch at or after start, return (inner_content, end_pos).
    end_pos is one past the closing char. Returns ("", start) if not found.
    """
    op = text.find(open_ch, start)
    if op == -1:
        return ("", start)
    cl = find_matching_close(text, op, open_ch, close_ch)
    if cl == -1:
        return ("", start)
    return (text[op + 1:cl], cl + 1)


def _split_params(raw_params: str) -> list:
    """Split param string respecting <> and () nesting."""
    params = []
    depth = 0
    current = []
    for ch in raw_params:
        if ch in '<({':
            depth += 1
            current.append(ch)
        elif ch in '>)}':
            depth -= 1
            current.append(ch)
        elif ch == ',' and depth == 0:
            p = ''.join(current).strip()
            if p:
                params.append(p)
            current = []
        else:
            current.append(ch)
    p = ''.join(current).strip()
    if p:
        params.append(p)
    return params


# ---------------------------------------------------------------------------
# Doc comment parsing
# ---------------------------------------------------------------------------

def parse_doc_comment(raw: str) -> DocComment:
    """Parse a /** ... */ or // ... doc comment block."""
    text = raw.strip()
    if text.startswith('/**'):
        text = text[3:]
    if text.endswith('*/'):
        text = text[:-2]

    lines = []
    for line in text.split('\n'):
        line = line.strip().lstrip('*').strip()
        lines.append(line)

    doc = DocComment()
    desc_lines = []
    i = 0
    while i < len(lines):
        line = lines[i]
        # @param
        m = re.match(r'@param(?:\[[\w,]+\])?\s+(\w+)\s*(.*)', line, re.IGNORECASE)
        if m:
            pname, pdesc = m.group(1), m.group(2).strip()
            i += 1
            while i < len(lines) and lines[i] and not lines[i].startswith('@'):
                pdesc += ' ' + lines[i]
                i += 1
            doc.params[pname] = pdesc.strip(' -')
            continue
        # @return / @returns
        m = re.match(r'@returns?\s*(.*)', line, re.IGNORECASE)
        if m:
            rdesc = m.group(1).strip()
            i += 1
            while i < len(lines) and lines[i] and not lines[i].startswith('@'):
                rdesc += ' ' + lines[i]
                i += 1
            doc.returns = rdesc.strip(' -')
            continue
        # @note, @see etc
        m = re.match(r'@(?:note|see|todo)\s*(.*)', line, re.IGNORECASE)
        if m:
            note = m.group(1).strip()
            if note:
                desc_lines.append(note)
            i += 1
            continue
        # Skip other @tags
        if re.match(r'@\w+', line):
            i += 1
            continue
        desc_lines.append(line)
        i += 1

    doc.description = ' '.join(l for l in desc_lines if l).strip()
    return doc


# ---------------------------------------------------------------------------
# Preprocessor tracking
# ---------------------------------------------------------------------------

class EditorOnlyTracker:
    EDITOR_GUARDS = {'WITH_EDITOR', 'WITH_EDITORONLY_DATA'}

    def __init__(self):
        self._stack: list[bool] = []

    def handle_line(self, line: str):
        s = line.strip()
        if s.startswith('#if ') or s.startswith('#ifdef '):
            cond = s.split(None, 1)[1].strip() if ' ' in s else ''
            is_editor = any(g in cond for g in self.EDITOR_GUARDS)
            self._stack.append(is_editor)
        elif s.startswith('#elif') or s.startswith('#else'):
            if self._stack:
                self._stack[-1] = False
        elif s.startswith('#endif'):
            if self._stack:
                self._stack.pop()

    @property
    def editor_only(self) -> bool:
        return any(self._stack)


# ---------------------------------------------------------------------------
# Function signature parsing
# ---------------------------------------------------------------------------

_API_RE = re.compile(r'\b[A-Z][A-Z0-9_]+_API\b\s*')
_MUTABLE_RE = re.compile(r'\bmutable\b\s*')

_PREFIX_MODS = {'virtual', 'static', 'inline', 'explicit', 'friend',
                'forceinline', 'forcenoinline', 'ue_nodiscard', 'constexpr',
                'extern'}


def _clean_type(t: str) -> str:
    return ' '.join(t.split())


def _parse_function_signature(sig: str):
    """
    Parse: [modifiers] [return_type] name(params)
    Returns (return_type, name, raw_params, modifiers).
    """
    sig = _API_RE.sub('', sig).strip()

    # Find the params: rightmost balanced () pair
    paren_close = len(sig) - 1
    while paren_close >= 0 and sig[paren_close] != ')':
        paren_close -= 1
    if paren_close < 0:
        return "", sig.strip(), "", []

    # Walk backward from paren_close to find matching (
    depth = 0
    paren_open = paren_close
    while paren_open >= 0:
        if sig[paren_open] == ')':
            depth += 1
        elif sig[paren_open] == '(':
            depth -= 1
            if depth == 0:
                break
        paren_open -= 1

    if paren_open < 0:
        return "", sig.strip(), "", []

    raw_params = sig[paren_open + 1:paren_close].strip()
    before = sig[:paren_open].strip()

    # Extract leading modifier keywords
    modifiers = []
    words = before.split()
    i = 0
    while i < len(words):
        w = words[i].lower()
        if w in _PREFIX_MODS:
            modifiers.append(words[i].lower())
            i += 1
        else:
            break
    before = ' '.join(words[i:])

    # Last token is function name
    tokens = before.rsplit(None, 1)
    if len(tokens) == 2:
        return_type = _clean_type(tokens[0])
        name = tokens[1]
    elif len(tokens) == 1:
        return_type = ""
        name = tokens[0]
    else:
        return_type = ""
        name = ""

    # Handle pointer/ref in name vs return_type
    while name.startswith(('*', '&')):
        return_type = return_type + name[0]
        name = name[1:]

    return _clean_type(return_type), name.strip(), raw_params, modifiers


# ---------------------------------------------------------------------------
# Enum body parsing
# ---------------------------------------------------------------------------

def _parse_enum_body(body: str) -> list:
    values = []
    for line in body.split('\n'):
        comment = ""
        ci = line.find('//')
        if ci != -1:
            comment = line[ci + 2:].strip()
            line = line[:ci]
        line = line.strip().rstrip(',').strip()
        if not line:
            continue
        # Skip UMETA(...)
        line = re.sub(r'UMETA\s*\([^)]*\)', '', line).strip()
        if not line:
            continue
        m = re.match(r'(\w+)\s*=\s*(.*)', line)
        if m:
            values.append(EnumValue(name=m.group(1).strip(),
                                    value=m.group(2).strip(), comment=comment))
        elif re.match(r'\w+\s*$', line):
            values.append(EnumValue(name=line.strip(), value="", comment=comment))
    return values


# ---------------------------------------------------------------------------
# Delegate parsing
# ---------------------------------------------------------------------------

def _parse_delegate(macro: str, args_str: str,
                    comment: Optional[DocComment]) -> DelegateInfo:
    is_dynamic = 'DYNAMIC' in macro
    is_multicast = 'MULTICAST' in macro
    args = [a.strip() for a in args_str.split(',') if a.strip()]
    name = args[0] if args else ""
    rest = args[1:]
    params = []
    for i in range(0, len(rest) - 1, 2):
        ptype = rest[i].strip()
        pname = rest[i + 1].strip() if i + 1 < len(rest) else ""
        if ptype:
            params.append((ptype, pname))
    return DelegateInfo(name=name, macro=macro, params=params, comment=comment,
                        is_multicast=is_multicast, is_dynamic=is_dynamic)


# ---------------------------------------------------------------------------
# Class body parsing — line-based approach to avoid regex backtracking
# ---------------------------------------------------------------------------

# Macros to skip entirely (skip line when encountered)
_SKIP_MACROS = frozenset({
    'GENERATED_BODY', 'GENERATED_UCLASS_BODY', 'GENERATED_USTRUCT_BODY',
    'GENERATED_USTRUCT_BODY', 'check', 'checkSlow', 'ensure', 'checkf',
    'UE_LOG', 'DECLARE_LOG_CATEGORY_EXTERN', 'DECLARE_STATS_GROUP',
})

# Macros we handle explicitly (don't treat as regular lines)
_HANDLED_MACROS = frozenset({
    'UPROPERTY', 'UFUNCTION', 'UENUM', 'UE_DEPRECATED', 'UE_DEPRECATED_FORGAME',
    'DECLARE_DELEGATE', 'DECLARE_MULTICAST_DELEGATE', 'DECLARE_DYNAMIC_DELEGATE',
    'DECLARE_DYNAMIC_MULTICAST_DELEGATE', 'DECLARE_TS_MULTICAST_DELEGATE',
    'DECLARE_MULTICAST_DELEGATE_OneParam', 'DECLARE_MULTICAST_DELEGATE_TwoParams',
    'DECLARE_MULTICAST_DELEGATE_ThreeParams',
})

# C++ keywords that start non-function statements
_SKIP_KEYWORDS = frozenset({
    'if', 'else', 'while', 'for', 'do', 'switch', 'return', 'break',
    'continue', 'typedef', 'using', 'namespace', 'template',
    'throw', 'try', 'catch', 'static_assert', 'static_cast',
    'reinterpret_cast', 'dynamic_cast', 'const_cast',
})


def _parse_class_body(body: str, class_kind: str = 'class') -> tuple:
    """
    Parse class/struct body. Returns (properties, functions, nested_enums).
    Uses a position-based scanner; falls back to line-skip for unrecognized patterns.
    """
    properties: list = []
    functions: list = []
    nested_enums: list = []

    access = 'private' if class_kind == 'class' else 'public'
    editor_tracker = EditorOnlyTracker()
    pending_comment: Optional[DocComment] = None
    pending_deprecated = False
    pending_dep_version = ""
    pending_dep_msg = ""

    i = 0
    n = len(body)

    def advance_line() -> int:
        """Return position after the next newline from i."""
        end = body.find('\n', i)
        return (end + 1) if end != -1 else n

    def skip_block(pos: int) -> int:
        """Skip a {…} block starting at or after pos."""
        bp = body.find('{', pos)
        if bp == -1:
            return pos
        cl = find_matching_close(body, bp, '{', '}')
        if cl == -1:
            return pos
        ep = cl + 1
        while ep < n and body[ep] in ' \t\r\n':
            ep += 1
        if ep < n and body[ep] == ';':
            ep += 1
        return ep

    while i < n:
        # Skip pure whitespace
        while i < n and body[i] in ' \t\r\n':
            i += 1
        if i >= n:
            break

        ch = body[i]

        # --- Preprocessor ---
        if ch == '#':
            end = body.find('\n', i)
            pp_line = body[i:(end if end != -1 else n)]
            editor_tracker.handle_line(pp_line)
            i = (end + 1) if end != -1 else n
            continue

        # --- Doc comment /** */ ---
        if body[i:i+3] == '/**':
            end = body.find('*/', i)
            if end == -1:
                i += 3
                continue
            pending_comment = parse_doc_comment(body[i:end + 2])
            i = end + 2
            continue

        # --- Block comment /* */ ---
        if body[i:i+2] == '/*':
            end = body.find('*/', i)
            i = (end + 2) if end != -1 else n
            continue

        # --- Line comment // ---
        if body[i:i+2] == '//':
            end = body.find('\n', i)
            line_text = body[i:(end if end != -1 else n)].lstrip('/').strip()
            if pending_comment is None and line_text:
                pending_comment = DocComment(description=line_text)
            i = (end + 1) if end != -1 else n
            continue

        # --- Read identifier at current position ---
        m = re.match(r'\w+', body[i:])
        if not m:
            # Not an identifier; skip character
            i += 1
            continue
        ident = m.group(0)

        # --- Access specifier: public: protected: private: ---
        if ident in ('public', 'protected', 'private'):
            colon_pos = i + len(ident)
            while colon_pos < n and body[colon_pos] in ' \t':
                colon_pos += 1
            if colon_pos < n and body[colon_pos] == ':':
                access = ident
                i = colon_pos + 1
                pending_comment = None
                continue

        # --- GENERATED_BODY etc ---
        if ident in _SKIP_MACROS or (ident.startswith('GENERATED_') and ident.endswith('BODY')):
            # Skip to end of line or semicolon
            end = body.find('\n', i)
            i = (end + 1) if end != -1 else n
            pending_comment = None
            continue

        # --- UE_DEPRECATED / UE_DEPRECATED_FORGAME ---
        if ident in ('UE_DEPRECATED', 'UE_DEPRECATED_FORGAME'):
            inner, ep = extract_balanced(body, i, '(', ')')
            parts = inner.split(',', 1)
            pending_dep_version = parts[0].strip().strip('"')
            pending_dep_msg = parts[1].strip().strip('"') if len(parts) > 1 else ""
            pending_deprecated = True
            i = ep
            continue

        # --- UPROPERTY ---
        if ident == 'UPROPERTY':
            inner, ep = extract_balanced(body, i, '(', ')')
            specifiers = inner.strip()
            comment = pending_comment
            pending_comment = None
            # Advance to property declaration
            j = ep
            while j < n and body[j] in ' \t\r\n':
                j += 1
            semi = body.find(';', j)
            if semi == -1:
                i = ep
                continue
            decl = _API_RE.sub('', body[j:semi]).strip()
            decl = _MUTABLE_RE.sub('', decl).strip()
            # Parse: TYPE NAME [: bits]
            m2 = re.match(r'(.*?)\s+(\w+)\s*(?::\s*\d+)?\s*$', decl, re.DOTALL)
            if m2:
                ptype = _clean_type(m2.group(1))
                pname = m2.group(2)
                bf = re.search(r':\s*(\d+)', decl)
                if bf:
                    ptype += ' : ' + bf.group(1)
            else:
                ptype = decl
                pname = ""
            prop = PropertyInfo(name=pname, type=ptype, specifiers=specifiers,
                                access=access, editor_only=editor_tracker.editor_only,
                                comment=comment)
            properties.append(prop)
            i = semi + 1
            continue

        # --- UFUNCTION ---
        if ident == 'UFUNCTION':
            inner, ep = extract_balanced(body, i, '(', ')')
            uf_specs = inner.strip()
            comment = pending_comment
            pending_comment = None
            j = ep
            while j < n and body[j] in ' \t\r\n':
                j += 1
            # Read function signature until ; or {
            k = j
            depth = 0
            while k < n:
                if body[k] == '(':
                    depth += 1
                elif body[k] == ')':
                    depth -= 1
                elif body[k] == '{' and depth == 0:
                    break
                elif body[k] == ';' and depth == 0:
                    break
                k += 1
            sig_text = _API_RE.sub('', body[j:k]).strip()
            # Skip body if inline
            end_fn = k
            if k < n and body[k] == '{':
                cl = find_matching_close(body, k, '{', '}')
                end_fn = (cl + 1) if cl != -1 else k + 1
            elif k < n and body[k] == ';':
                end_fn = k + 1
            ret, name, raw_params, mods = _parse_function_signature(sig_text)
            fn = FunctionInfo(
                name=name, return_type=ret, raw_params=raw_params,
                full_signature=sig_text.strip(), uf_specifiers=uf_specs,
                comment=comment, modifiers=mods, access=access,
                is_deprecated=pending_deprecated, deprecated_version=pending_dep_version,
                deprecated_msg=pending_dep_msg, editor_only=editor_tracker.editor_only,
            )
            functions.append(fn)
            pending_deprecated = False
            pending_dep_version = ""
            pending_dep_msg = ""
            i = end_fn
            continue

        # --- UENUM ---
        if ident == 'UENUM':
            inner, ep = extract_balanced(body, i, '(', ')')
            ue_specs = inner.strip()
            comment = pending_comment
            pending_comment = None
            j = ep
            while j < n and body[j] in ' \t\r\n':
                j += 1
            result = _parse_enum_at(body, j, ue_specs, comment, editor_tracker.editor_only)
            if result:
                nested_enums.append(result[0])
                i = result[1]
            else:
                i = j
            continue

        # --- DECLARE_*_DELEGATE inside class (inline delegate typedef) ---
        if ident.startswith('DECLARE_') and 'DELEGATE' in ident:
            # Skip to end of line
            end = body.find('\n', i)
            i = (end + 1) if end != -1 else n
            pending_comment = None
            continue

        # --- Nested struct/class ---
        if ident in ('struct', 'class'):
            # Find the next { - check if this is a definition (not forward decl)
            bp = body.find('{', i)
            semi = body.find(';', i)
            if bp != -1 and (semi == -1 or bp < semi):
                # Has a body - skip the whole nested class
                i = skip_block(bp)
                pending_comment = None
            else:
                # Forward declaration - skip to ;
                i = (semi + 1) if semi != -1 else advance_line()
                pending_comment = None
            continue

        # --- Nested enum ---
        if ident == 'enum':
            result = _parse_enum_at(body, i, "", pending_comment, editor_tracker.editor_only)
            if result:
                nested_enums.append(result[0])
                pending_comment = None
                i = result[1]
            else:
                i = advance_line()
                pending_comment = None
            continue

        # --- Skip C++ keywords that aren't declarations ---
        if ident in _SKIP_KEYWORDS:
            i = advance_line()
            pending_comment = None
            continue

        # --- Try to parse as a function or method ---
        # Look for '(' on the current line (safe: no cross-line backtracking)
        line_end = body.find('\n', i)
        if line_end == -1:
            line_end = n
        current_line = body[i:line_end]

        paren_in_line = current_line.find('(')
        if paren_in_line != -1:
            abs_open = i + paren_in_line
            # Make sure there's a word just before '('
            before_paren = body[i:abs_open].rstrip()
            if before_paren and re.search(r'\w$', before_paren):
                cl_pos = find_matching_close(body, abs_open, '(', ')')
                if cl_pos != -1:
                    sig_text = _API_RE.sub('', body[i:cl_pos + 1]).strip()
                    sig_text = _MUTABLE_RE.sub('', sig_text).strip()
                    ret, name, raw_params, mods = _parse_function_signature(sig_text)

                    # Skip obvious non-functions
                    skip = (not name or name in _SKIP_KEYWORDS or
                            name in _SKIP_MACROS or name in _HANDLED_MACROS)
                    # All-uppercase is probably a macro
                    if name and name == name.upper() and '_' in name:
                        skip = True

                    if not skip:
                        # Skip inline body or find end of declaration
                        end_fn = cl_pos + 1
                        k = end_fn
                        while k < n and body[k] in ' \t':
                            k += 1
                        if k < n and body[k] == '{':
                            cl2 = find_matching_close(body, k, '{', '}')
                            end_fn = (cl2 + 1) if cl2 != -1 else k + 1
                        else:
                            # Find ; on current or next line
                            semi = body.find(';', cl_pos + 1)
                            nl = body.find('\n', cl_pos + 1)
                            if semi != -1 and (nl == -1 or semi <= nl + 2):
                                end_fn = semi + 1
                            else:
                                end_fn = (nl + 1) if nl != -1 else n

                        comment = pending_comment
                        pending_comment = None

                        fn = FunctionInfo(
                            name=name, return_type=ret, raw_params=raw_params,
                            full_signature=sig_text.strip(), uf_specifiers="",
                            comment=comment, modifiers=mods, access=access,
                            is_deprecated=pending_deprecated,
                            deprecated_version=pending_dep_version,
                            deprecated_msg=pending_dep_msg,
                            editor_only=editor_tracker.editor_only,
                        )
                        functions.append(fn)
                        pending_deprecated = False
                        pending_dep_version = ""
                        pending_dep_msg = ""
                        i = end_fn
                        continue

        # --- Default: skip line ---
        i = advance_line()
        pending_comment = None

    return properties, functions, nested_enums


# ---------------------------------------------------------------------------
# Enum at position
# ---------------------------------------------------------------------------

def _parse_enum_at(text: str, pos: int, ue_specs: str,
                   comment: Optional[DocComment], editor_only: bool):
    """Parse enum starting at pos. Returns (EnumInfo, end_pos) or None."""
    m = re.match(r'enum\s+(?:class\s+)?(\w+)\s*(?::\s*(\w+))?\s*\{',
                 text[pos:], re.DOTALL)
    if not m:
        return None
    name = m.group(1)
    underlying = m.group(2) or ""
    brace_start = pos + m.end() - 1
    cl = find_matching_close(text, brace_start, '{', '}')
    if cl == -1:
        return None
    body = text[brace_start + 1:cl]
    values = _parse_enum_body(body)
    end_pos = cl + 1
    while end_pos < len(text) and text[end_pos] in ' \t\r\n':
        end_pos += 1
    if end_pos < len(text) and text[end_pos] == ';':
        end_pos += 1
    return EnumInfo(name=name, underlying_type=underlying, ue_specifiers=ue_specs,
                    comment=comment, values=values, editor_only=editor_only), end_pos


# ---------------------------------------------------------------------------
# Class/struct at position
# ---------------------------------------------------------------------------

def _parse_class_or_struct_at(text: str, pos: int, ue_specs: str,
                               comment: Optional[DocComment], macro: str):
    """Parse class/struct at pos. Returns (ClassInfo, end_pos) or None."""
    m = re.match(
        r'(class|struct)\s+'
        r'(?:([A-Z][A-Z0-9_]+_API)\s+)?'
        r'(\w+)'
        r'(?:\s*:\s*([^{;]+?))?'
        r'\s*\{',
        text[pos:], re.DOTALL
    )
    if not m:
        return None
    kind = m.group(1)
    module_api = m.group(2) or ""
    name = m.group(3)
    bases_str = m.group(4) or ""

    bases = []
    for b in bases_str.split(','):
        b = b.strip()
        b = re.sub(r'^(?:public|protected|private)\s+', '', b).strip()
        if b:
            bases.append(b)

    brace_pos = pos + m.end() - 1
    cl = find_matching_close(text, brace_pos, '{', '}')
    if cl == -1:
        return None

    body = text[brace_pos + 1:cl]
    props, fns, nested_enums = _parse_class_body(body, kind)

    end_pos = cl + 1
    while end_pos < len(text) and text[end_pos] in ' \t\r\n':
        end_pos += 1
    if end_pos < len(text) and text[end_pos] == ';':
        end_pos += 1

    return ClassInfo(name=name, kind=kind, bases=bases, ue_specifiers=ue_specs,
                     module_api=module_api, comment=comment, properties=props,
                     functions=fns, nested_enums=nested_enums), end_pos


# ---------------------------------------------------------------------------
# Namespace free-function parsing
# ---------------------------------------------------------------------------

def _parse_namespace_functions(body: str) -> list:
    """Parse free functions in a namespace body."""
    functions = []
    i = 0
    n = len(body)
    pending_comment: Optional[DocComment] = None

    while i < n:
        while i < n and body[i] in ' \t\r\n':
            i += 1
        if i >= n:
            break

        ch = body[i]

        if ch == '#':
            end = body.find('\n', i)
            i = (end + 1) if end != -1 else n
            continue

        if body[i:i+3] == '/**':
            end = body.find('*/', i)
            if end == -1:
                i += 3
                continue
            pending_comment = parse_doc_comment(body[i:end + 2])
            i = end + 2
            continue

        if body[i:i+2] == '/*':
            end = body.find('*/', i)
            i = (end + 2) if end != -1 else n
            continue

        if body[i:i+2] == '//':
            end = body.find('\n', i)
            line_text = body[i:(end if end != -1 else n)].lstrip('/').strip()
            if pending_comment is None and line_text:
                pending_comment = DocComment(description=line_text)
            i = (end + 1) if end != -1 else n
            continue

        # typedef/using
        if body[i:].startswith(('typedef ', 'using ')):
            end = body.find(';', i)
            i = (end + 1) if end != -1 else n
            pending_comment = None
            continue

        # struct/class/enum — skip
        m = re.match(r'(struct|class|enum)\b', body[i:])
        if m and m.start() == 0:
            bp = body.find('{', i)
            semi = body.find(';', i)
            if bp != -1 and (semi == -1 or bp < semi):
                cl = find_matching_close(body, bp, '{', '}')
                ep = cl + 1 if cl != -1 else bp + 1
                while ep < n and body[ep] in ' \t\r\n':
                    ep += 1
                if ep < n and body[ep] == ';':
                    ep += 1
                i = ep
            else:
                i = (semi + 1) if semi != -1 else n
            pending_comment = None
            continue

        # Look for function on current line
        line_end = body.find('\n', i)
        if line_end == -1:
            line_end = n
        current_line = body[i:line_end]

        paren_in_line = current_line.find('(')
        if paren_in_line != -1:
            abs_open = i + paren_in_line
            before_paren = body[i:abs_open].rstrip()
            if before_paren and re.search(r'\w$', before_paren):
                cl_pos = find_matching_close(body, abs_open, '(', ')')
                if cl_pos != -1:
                    sig_text = _API_RE.sub('', body[i:cl_pos + 1]).strip()
                    ret, name, raw_params, mods = _parse_function_signature(sig_text)

                    skip = (not name or name in _SKIP_KEYWORDS or
                            name in _SKIP_MACROS)
                    if name and name == name.upper() and '_' in name:
                        skip = True

                    if not skip:
                        end_fn = cl_pos + 1
                        k = end_fn
                        while k < n and body[k] in ' \t\r\n':
                            k += 1
                        if k < n and body[k] == '{':
                            cl2 = find_matching_close(body, k, '{', '}')
                            end_fn = (cl2 + 1) if cl2 != -1 else k + 1
                        elif k < n and body[k] == ';':
                            end_fn = k + 1
                        else:
                            end_fn = line_end + 1

                        fn = FreeFunction(name=name, full_signature=sig_text.strip(),
                                          return_type=ret, raw_params=raw_params,
                                          comment=pending_comment, modifiers=mods)
                        functions.append(fn)
                        pending_comment = None
                        i = end_fn
                        continue

        # Skip line
        i = line_end + 1
        pending_comment = None

    return functions


# ---------------------------------------------------------------------------
# Module name inference
# ---------------------------------------------------------------------------

_MODULE_SUFFIXES = [
    'MODULE', 'MANAGER', 'SYSTEM', 'EDITOR', 'UTILS', 'TYPES',
    'SETTINGS', 'TAGS', 'ENGINE', 'CORE', 'INTERFACE', 'COMPONENT',
    'SUBSYSTEM', 'PLUGIN', 'RUNTIME', 'TASK', 'TASKS', 'GAME',
]


def _caps_to_camel(s: str) -> str:
    """Convert ALL-CAPS identifier to CamelCase: AIMODULE->AIModule, GAMEPLAYTAGS->GameplayTags."""
    if not s:
        return s
    if '_' in s:
        return ''.join(w.capitalize() for w in s.split('_'))

    words = []
    remaining = s
    while remaining:
        found = False
        for suffix in sorted(_MODULE_SUFFIXES, key=len, reverse=True):
            if remaining.endswith(suffix) and len(remaining) > len(suffix):
                words.insert(0, suffix.capitalize())
                remaining = remaining[:-len(suffix)]
                found = True
                break
        if not found:
            # Remaining is either a short acronym (≤3 chars) or a word
            words.insert(0, remaining if len(remaining) <= 3 else remaining.capitalize())
            break
    return ''.join(words)


def _infer_module(filename: str, text: str) -> str:
    m = re.search(r'\b([A-Z][A-Z0-9]+)_API\b', text)
    if m:
        return _caps_to_camel(m.group(1))
    return Path(filename).stem


# ---------------------------------------------------------------------------
# Top-level parse_header
# ---------------------------------------------------------------------------

def parse_header(filepath: str) -> ParsedHeader:
    path = Path(filepath)
    text = path.read_text(encoding='utf-8', errors='replace')

    header = ParsedHeader(
        filepath=str(path),
        filename=path.name,
        module_name=_infer_module(path.name, text),
    )

    i = 0
    n = len(text)
    editor_tracker = EditorOnlyTracker()
    pending_comment: Optional[DocComment] = None

    while i < n:
        while i < n and text[i] in ' \t\r\n':
            i += 1
        if i >= n:
            break

        ch = text[i]

        # Preprocessor
        if ch == '#':
            end = text.find('\n', i)
            pp_line = text[i:(end if end != -1 else n)]
            editor_tracker.handle_line(pp_line)
            i = (end + 1) if end != -1 else n
            pending_comment = None
            continue

        # Doc comment
        if text[i:i+3] == '/**':
            end = text.find('*/', i)
            if end == -1:
                i += 3
                continue
            pending_comment = parse_doc_comment(text[i:end + 2])
            i = end + 2
            continue

        # Block comment
        if text[i:i+2] == '/*':
            end = text.find('*/', i)
            i = (end + 2) if end != -1 else n
            continue

        # Line comment
        if text[i:i+2] == '//':
            end = text.find('\n', i)
            line_text = text[i:(end if end != -1 else n)].lstrip('/').strip()
            if pending_comment is None and line_text:
                pending_comment = DocComment(description=line_text)
            i = (end + 1) if end != -1 else n
            continue

        # Read identifier
        m = re.match(r'\w+', text[i:])
        if not m:
            i += 1
            continue
        ident = m.group(0)

        # Delegate macros
        if ident.startswith('DECLARE_') and 'DELEGATE' in ident:
            inner, ep = extract_balanced(text, i, '(', ')')
            delegate = _parse_delegate(ident, inner, pending_comment)
            header.delegates.append(delegate)
            pending_comment = None
            # skip to next statement
            while ep < n and text[ep] in ' \t\r\n;':
                ep += 1
            i = ep
            continue

        # namespace
        if ident == 'namespace':
            m2 = re.match(r'namespace\s+([\w:]+)\s*\{', text[i:])
            if not m2:
                m2 = re.match(r'namespace\s+([\w:]+)\s*\n\s*\{', text[i:])
            if m2:
                ns_name = m2.group(1)
                brace_pos = i + m2.end() - 1
                cl = find_matching_close(text, brace_pos, '{', '}')
                if cl != -1:
                    body = text[brace_pos + 1:cl]
                    fns = _parse_namespace_functions(body)
                    if fns:
                        ns = NamespaceInfo(name=ns_name, functions=fns)
                        header.namespaces.append(ns)
                    pending_comment = None
                    i = cl + 1
                else:
                    i += m2.end()
            else:
                end = text.find('\n', i)
                i = (end + 1) if end != -1 else n
            continue

        # UENUM
        if ident == 'UENUM':
            inner, ep = extract_balanced(text, i, '(', ')')
            ue_specs = inner.strip()
            comment = pending_comment
            pending_comment = None
            j = ep
            while j < n and text[j] in ' \t\r\n':
                j += 1
            result = _parse_enum_at(text, j, ue_specs, comment, editor_tracker.editor_only)
            if result:
                header.enums.append(result[0])
                i = result[1]
            else:
                i = j
            continue

        # UCLASS / USTRUCT
        if ident in ('UCLASS', 'USTRUCT'):
            inner, ep = extract_balanced(text, i, '(', ')')
            ue_specs = inner.strip()
            comment = pending_comment
            pending_comment = None
            j = ep
            while j < n and text[j] in ' \t\r\n':
                j += 1
            result = _parse_class_or_struct_at(text, j, ue_specs, comment, ident)
            if result:
                header.classes.append(result[0])
                i = result[1]
            else:
                i = j
            continue

        # Bare struct/class
        if ident in ('struct', 'class'):
            comment = pending_comment
            pending_comment = None
            result = _parse_class_or_struct_at(text, i, "", comment, "")
            if result:
                header.classes.append(result[0])
                i = result[1]
            else:
                end = text.find('\n', i)
                i = (end + 1) if end != -1 else n
            continue

        # Bare enum
        if ident == 'enum':
            result = _parse_enum_at(text, i, "", pending_comment, editor_tracker.editor_only)
            if result:
                header.enums.append(result[0])
                pending_comment = None
                i = result[1]
            else:
                end = text.find('\n', i)
                i = (end + 1) if end != -1 else n
            continue

        # Skip line
        end = text.find('\n', i)
        pending_comment = None
        i = (end + 1) if end != -1 else n

    return header