mp-units/scripts/systems_reference.py

#!/usr/bin/env python3
"""
Generate reference documentation for mp-units systems from C++ header files.

This script parses system header files and generates markdown documentation including:
- Dimensions index (alphabetical list)
- Quantities index (alphabetical list)
- Units index (alphabetical list)
- Per-system reference pages with base/derived units separated
"""

import hashlib
import json
import re
import sys
from collections import defaultdict
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict, List, Optional, Set


@dataclass
class Dimension:
    """Represents a dimension definition"""

    name: str
    symbol: str
    namespace: str
    file: str


@dataclass
class Quantity:
    """Represents a quantity specification"""

    name: str
    parent: Optional[str] = None
    dimension: Optional[str] = None
    equation: Optional[str] = None
    namespace: str = ""
    file: str = ""
    is_kind: bool = False
    alias_target: Optional[str] = (
        None  # If this is an alias, the name of the original quantity
    )
    # Fields from C++ extraction
    dimensional_formula: Optional[str] = (
        None  # Calculated dimensional formula (e.g., "L²MT⁻³")
    )
    character: str = "Real"  # Quantity character: Real, Complex, Vector, Tensor
    kind_of: str = ""  # e.g., "isq::length" - from C++ get_kind()
    parent_from_cpp: str = ""  # e.g., "isq::length" or "<root>" - from C++ qs._parent_


@dataclass
class Unit:
    """Represents a unit definition"""

    name: str
    symbol: str
    unit_symbols: list = None  # List of unit_symbol names (e.g., ['Ω', 'ohm'])
    definition: str = ""
    namespace: str = ""  # Full namespace path (e.g., "mp_units::usc::survey1893")
    file: str = ""
    is_alias: bool = False
    is_base: bool = False  # True if kind_of<> with no equation
    subnamespace: Optional[str] = None  # Relative subnamespace (e.g., "survey1893")
    origin_namespace: Optional[str] = (
        None  # Original namespace where defined (e.g., "non_si")
    )
    alias_target: Optional[str] = (
        None  # If this is an alias, the name of the original entity
    )

    def __post_init__(self):
        if self.unit_symbols is None:
            self.unit_symbols = []


@dataclass
class PointOrigin:
    """Represents a point origin definition"""

    name: str
    origin_type: str  # 'absolute' or 'relative'
    definition: str = (
        ""  # Template parameter (e.g., "isq::thermodynamic_temperature" or "point<...>")
    )
    namespace: str = ""
    file: str = ""
    alias_target: Optional[str] = (
        None  # If this is an alias, the name of the original entity
    )


@dataclass
class Prefix:
    """Represents a prefix definition (like 'kilo' or 'mega')"""

    name: str
    symbol: str
    definition: str = (
        ""  # Magnitude/scale (e.g., "mag_power<10, 3>" or "mag_power<2, 10>")
    )
    namespace: str = ""
    file: str = ""


@dataclass
class SystemInfo:
    """Information about a system (namespace-based)"""

    namespace: str
    files: List[Path] = field(default_factory=list)
    dimensions: List[Dimension] = field(default_factory=list)
    quantities: List[Quantity] = field(default_factory=list)
    units: List[Unit] = field(default_factory=list)
    point_origins: List[PointOrigin] = field(default_factory=list)
    prefixes: List[Prefix] = field(default_factory=list)


class SystemsParser:
    """Parser for mp-units system header files"""

    def __init__(self, systems_dir: Path):
        self.systems_dir = systems_dir
        self.systems: Dict[str, SystemInfo] = {}
        self.parsed_files: Set[Path] = set()  # Track parsed files to avoid duplicates
        # Determine the source root directory for parsing core framework files
        # systems_dir is src/systems/include/mp-units/systems, so we need to go up 5 levels to get to repo root
        self.source_root = systems_dir.parent.parent.parent.parent.parent

    def parse_all_systems(self):
        """Parse all system header files, following include order"""
        # First, parse core framework entities
        self._parse_core_framework()

        # Find all main system headers (both top-level and in subdirectories)
        main_headers = []

        # Top-level headers
        for header_file in self.systems_dir.glob("*.h"):
            if not header_file.name.startswith("_"):
                main_headers.append(header_file)

        # Subdirectory main headers (like si/si.h, isq/isq.h)
        for subdir in self.systems_dir.iterdir():
            if subdir.is_dir():
                # Look for main header with same name as directory
                main_header = subdir.parent / f"{subdir.name}.h"
                if main_header.exists() and main_header not in main_headers:
                    main_headers.append(main_header)

        # Parse each main header and its includes
        for main_header in main_headers:
            self.parse_system_with_includes(main_header)

        # After all files are parsed, assign unit_symbols as a second pass
        for system in self.systems.values():
            for file in system.files:
                if file.exists():
                    content = file.read_text()
                    self._parse_unit_symbols(content, system)

    def _parse_core_framework(self):
        """Parse core framework entities (built-in units and quantities)"""
        # Create a synthetic "core" system (empty namespace = mp_units::)
        core_system = SystemInfo(namespace="")
        self.systems["core"] = core_system

        # Hardcode dimensionless quantity - it's a fundamental identity
        dimensionless_qty = Quantity(
            name="dimensionless",
            parent="",
            dimension="dimensionless",
            equation="",  # No equation for dimensionless
            namespace="mp_units",
            file="quantity_spec.h",
            is_kind=True,
            alias_target="",
            dimensional_formula="1",
            character="Real",
            kind_of="dimensionless",
            parent_from_cpp="<root>",
        )
        core_system.quantities.append(dimensionless_qty)

        # Hardcode dimension_one - the identity dimension for dimensionless quantities
        dimension_one_dim = Dimension(
            name="dimension_one",
            symbol="1",
            namespace="mp_units",
            file="dimension.h",
        )
        core_system.dimensions.append(dimension_one_dim)

        # Hardcode 'one' unit - it's a fundamental identity
        one_unit = Unit(
            name="one",
            symbol="1",
            definition="",  # No definition for one
            namespace="mp_units",
            file="unit.h",
            is_base=True,
        )
        core_system.units.append(one_unit)

        # Parse other units from unit.h using existing parsers
        unit_path = self.source_root / "src/core/include/mp-units/framework/unit.h"
        if unit_path.exists():
            try:
                content = unit_path.read_text()
                # Only parse content after "common dimensionless units" comment
                # and before "Common unit" comment to avoid parsing examples
                start_marker = content.find("// common dimensionless units")
                end_marker = content.find("// Common unit")
                if start_marker != -1 and end_marker != -1:
                    content = content[start_marker:end_marker]
                    # Parse units at mp_units namespace level (no sub-namespace)
                    self._parse_units(
                        content, core_system, str(unit_path), namespace_to_search=None
                    )
                    self._parse_unit_symbols(content, core_system)
            except Exception as e:
                print(f"Warning: Could not parse {unit_path}: {e}")
            # Add core.h as the public header (not unit.h which is internal)
            core_header = self.source_root / "src/core/include/mp-units/core.h"
            core_system.files.append(core_header)

    def parse_system_with_includes(self, main_header: Path):
        """Parse a system header and all its includes in order"""
        content = main_header.read_text()

        # Extract includes from this header
        include_pattern = r"#include\s+<mp-units/systems/([^>]+)>"
        includes = []

        for match in re.finditer(include_pattern, content):
            include_path = match.group(1)
            # Resolve relative to systems directory (already at mp-units/systems/)
            included_file = self.systems_dir / include_path
            if included_file.exists():
                includes.append(included_file)

        # Parse all included files first (in order)
        for included in includes:
            self.parse_system_with_includes(included)

        # Parse the main file
        self.parse_system_header(main_header)

        # If this is an umbrella header (has includes but didn't establish its own namespace),
        # add it to the system determined by its included files
        if includes:
            # Check if main_header established its own namespace
            namespace_match = re.search(r"namespace\s+mp_units::(\w+)", content)
            if not namespace_match:
                namespace_match = re.search(
                    r"namespace\s+mp_units\s*\{[^}]*namespace\s+(\w+)", content
                )

            # If no namespace found, this is an umbrella header
            if not namespace_match:
                # Find which system the included files belong to
                for included in includes:
                    for system in self.systems.values():
                        if included in system.files and main_header not in system.files:
                            system.files.insert(
                                0, main_header
                            )  # Add as first entry (primary header)
                            break
                    break  # Only need to check first included file

    def parse_system_header(self, header_file: Path):
        """Parse a single system header file"""
        # Skip if already parsed
        if header_file in self.parsed_files:
            return

        self.parsed_files.add(header_file)
        content = header_file.read_text()

        # Extract namespace
        namespace_match = re.search(r"namespace\s+mp_units::(\w+)", content)
        if not namespace_match:
            namespace_match = re.search(
                r"namespace\s+mp_units\s*\{[^}]*namespace\s+(\w+)", content
            )

        if not namespace_match:
            return

        namespace = namespace_match.group(1)

        # Get or create system info
        if namespace not in self.systems:
            self.systems[namespace] = SystemInfo(namespace=namespace)

        system = self.systems[namespace]
        if header_file not in system.files:
            system.files.append(header_file)

        # Parse content
        self._parse_dimensions(content, system, str(header_file))
        self._parse_quantities(content, system, str(header_file))
        self._parse_units(content, system, str(header_file))
        self._parse_point_origins(content, system, str(header_file))
        self._parse_prefixes(content, system, str(header_file))
        self._parse_aliases(content, system, str(header_file))

    def _is_in_namespace(self, content: str, pos: int, namespace: str) -> bool:
        """Check if position is inside the specified namespace (not in a nested namespace)"""
        if not namespace:
            return True  # No namespace filtering

        before_pos = content[:pos]

        # Find the last namespace declaration before this position
        # Handle both "namespace astronomy {" and "namespace mp_units::astronomy {"
        namespace_pattern = rf"namespace\s+(?:mp_units::)?{re.escape(namespace)}\s*{{"
        last_match = None
        for match in re.finditer(namespace_pattern, before_pos):
            last_match = match

        if not last_match:
            return False  # Not in the namespace at all

        # Check if we're still inside that namespace (not closed yet)
        section_after = before_pos[last_match.end() :]
        open_braces = (
            section_after.count("{") + 1
        )  # +1 for the opening brace of namespace
        close_braces = section_after.count("}")

        return open_braces > close_braces

    def _detect_origin_namespace(
        self, content: str, match_pos: int, system_namespace: str
    ) -> Optional[str]:
        """Detect the origin namespace including non_si and si2019"""
        before_match = content[:match_pos]

        # Track ALL namespaces including ones we exclude from display
        namespace_stack = []
        brace_depth = 0

        i = 0
        while i < len(before_match):
            char = before_match[i]

            if char == "{":
                brace_depth += 1
            elif char == "}":
                brace_depth -= 1
                while namespace_stack and brace_depth <= namespace_stack[-1][1]:
                    namespace_stack.pop()

            if before_match[i : i + 9] == "namespace":
                rest = before_match[i + 9 :]
                ns_match = re.match(r"\s+(\w+)\s*\{", rest)
                if ns_match:
                    ns_name = ns_match.group(1)
                    # Track all except main system and mp_units
                    if (
                        ns_name not in ["mp_units", "unit_symbols", system_namespace]
                        and "::" not in ns_name
                    ):
                        namespace_stack.append((ns_name, brace_depth))

            i += 1

        # Return origin namespace if it's non_si or si2019
        if namespace_stack:
            innermost = namespace_stack[-1][0]
            if innermost in ["non_si", "si2019"]:
                return innermost
        return None

    def _get_nested_namespace(
        self, content: str, match_pos: int, system_namespace: str
    ) -> Optional[str]:
        """Detect if a match is inside a nested namespace and return the nested namespace name"""
        before_match = content[:match_pos]

        # Namespaces that should NOT be treated as subnamespaces for display purposes
        excluded_namespaces = {
            "mp_units",
            "unit_symbols",
            "non_si",
            "si2019",
            system_namespace,
        }

        # Track all namespace openings with their brace depth
        namespace_stack = []
        brace_depth = 0

        # Process character by character to accurately track braces
        i = 0
        while i < len(before_match):
            char = before_match[i]

            if char == "{":
                brace_depth += 1
            elif char == "}":
                brace_depth -= 1
                # Pop namespace when scope closes
                while namespace_stack and brace_depth <= namespace_stack[-1][1]:
                    namespace_stack.pop()

            # Check for namespace declaration (but skip inline namespaces)
            if before_match[i : i + 9] == "namespace":
                # Check if this is an inline namespace (look backwards for "inline" keyword)
                before_namespace = before_match[max(0, i - 20) : i]
                is_inline = bool(re.search(r"\binline\s+$", before_namespace))

                if not is_inline:
                    # Extract namespace name
                    rest = before_match[i + 9 :]
                    ns_match = re.match(r"\s+(\w+)\s*\{", rest)
                    if ns_match:
                        ns_name = ns_match.group(1)
                        # Skip excluded namespaces and namespace with ::
                        if ns_name not in excluded_namespaces and "::" not in ns_name:
                            namespace_stack.append((ns_name, brace_depth))

            i += 1

        # Return the innermost nested namespace if any
        if namespace_stack:
            return namespace_stack[-1][0]
        return None

    def _parse_dimensions(self, content: str, system: SystemInfo, file: str):
        """Parse dimension definitions"""
        dim_pattern = (
            r"inline\s+constexpr\s+struct\s+(dim_\w+)\s+final\s*:\s*"
            r'base_dimension<(?:"([^"]+)"|symbol_text\{[^}]+\})>\s*\{\}\s+\1\s*;'
        )

        for match in re.finditer(dim_pattern, content):
            dim_name = match.group(1)
            symbol = (
                match.group(2)
                if match.group(2)
                else self._extract_symbol_text(match.group(0))
            )

            dimension = Dimension(
                name=dim_name,
                symbol=symbol,
                namespace=f"mp_units::{system.namespace}",
                file=file,
            )
            system.dimensions.append(dimension)

    def _extract_symbol_text(self, text: str) -> str:
        """Extract symbol from symbol_text{u8"...", "..."}"""
        match = re.search(r'symbol_text\{u8"([^"]+)"', text)
        if match:
            return match.group(1)
        match = re.search(r'symbol_text\{[^,]+,\s*"([^"]+)"', text)
        if match:
            return match.group(1)
        return "?"

    def _parse_quantities(self, content: str, system: SystemInfo, file: str):
        """Parse QUANTITY_SPEC definitions"""
        # Use a simpler pattern and extract the full content manually
        for match in re.finditer(r"QUANTITY_SPEC\s*\(", content):
            # Check if this line is commented out
            line_start = content.rfind("\n", 0, match.start()) + 1
            line_prefix = content[line_start : match.start()].strip()
            if line_prefix.startswith("//"):
                continue

            start = match.end() - 1  # Position of opening '('
            depth = 1
            i = start + 1

            # Find matching closing parenthesis
            while i < len(content) and depth > 0:
                if content[i] == "(":
                    depth += 1
                elif content[i] == ")":
                    depth -= 1
                i += 1

            if depth != 0:
                continue

            # Extract the arguments
            args_str = content[start + 1 : i - 1]
            args = self._split_macro_args(args_str)

            if len(args) < 2:
                continue

            qty_name = args[0].strip()
            second_param = args[1].strip()
            third_param = args[2].strip() if len(args) > 2 else None
            has_is_kind_keyword = "is_kind" in args_str

            # Determine what the second parameter is:
            # - If it starts with 'dim_' → it's a dimension (and this is a kind/root)
            # - If it contains operators, function calls, or parentheses → it's an
            #   equation (and this is a kind/root if no parent)
            # - If it's a simple name (including 'dimensionless') → it's a parent quantity
            # Note: 'dimensionless' is a quantity (kind), not a dimension
            is_dimension = second_param.startswith("dim_")

            # Check for equations: operators, function calls (word followed by '('), or complex expressions
            has_operators = any(
                op in second_param
                for op in [
                    "*",
                    "/",
                    "+",
                    "-",
                    "pow<",
                    "sqrt",
                    "cbrt",
                    "square",
                    "cubic",
                ]
            )
            has_function_call = re.search(r"\w+\s*\(", second_param) is not None

            parent = None
            dimension = None
            equation = None
            is_kind = has_is_kind_keyword

            if is_dimension:
                dimension = second_param
                is_kind = (
                    True  # Having a dim_* dimension as parent makes this a kind/root
                )
                # Third param might be an equation
                if (
                    third_param
                    and third_param != "is_kind"
                    and not third_param.startswith("quantity_character::")
                ):
                    equation = third_param
            elif has_operators or has_function_call:
                # Second param is an equation
                equation = second_param
                # This is a kind/root only if explicitly marked with is_kind or has no parent
                if has_is_kind_keyword:
                    is_kind = True
                else:
                    # If it has an equation and no parent, it's implicitly a kind
                    is_kind = True
            else:
                # Second param is a parent quantity (including 'dimensionless')
                parent = second_param
                # Third param might be an equation
                if (
                    third_param
                    and third_param != "is_kind"
                    and not third_param.startswith("quantity_character::")
                ):
                    equation = third_param
                # is_kind remains as set by has_is_kind_keyword

            quantity = Quantity(
                name=qty_name,
                parent=parent,
                dimension=dimension,
                equation=equation,
                namespace=f"mp_units::{system.namespace}",
                file=file,
                is_kind=is_kind,
            )
            system.quantities.append(quantity)

    def _split_macro_args(self, args_str: str) -> List[str]:
        """Split macro arguments respecting nested brackets"""
        args = []
        current = []
        depth = 0
        angle_depth = 0

        for char in args_str:
            if char == "(" or char == "{":
                depth += 1
                current.append(char)
            elif char == ")" or char == "}":
                depth -= 1
                current.append(char)
            elif char == "<":
                angle_depth += 1
                current.append(char)
            elif char == ">":
                angle_depth -= 1
                current.append(char)
            elif char == "," and depth == 0 and angle_depth == 0:
                args.append("".join(current))
                current = []
            else:
                current.append(char)

        if current:
            args.append("".join(current))

        return args

    def _parse_units(
        self,
        content: str,
        system: SystemInfo,
        file: str,
        namespace_to_search: Optional[str] = "auto",
    ):
        """Parse unit definitions from content

        Args:
            content: The content to parse
            system: The system to add units to
            file: The file path
            namespace_to_search: Namespace to look for units in.
                                 "auto" (default) = use system.namespace
                                 None = parse at top level (no namespace filtering)
                                 string = specific namespace to search
        """
        # Determine which namespace to search for
        if namespace_to_search == "auto":
            namespace_to_search = system.namespace

        # Pattern 1a: inline constexpr struct NAME final : named_unit<"symbol", ...> {} NAME;
        unit_pattern_simple = (
            r"inline\s+constexpr\s+struct\s+(\w+)\s+final\s*:\s*"
            r'named_unit<"([^"]+)",\s*(.+?)>\s*\{\}\s*(\w+)\s*;'
        )

        # Pattern 1b: inline constexpr struct NAME final :
        # named_unit<symbol_text{u8"unicode", "ascii"}, ...> {} NAME;
        # Handle optional comments inside symbol_text like /* U+2030 PER MILLE SIGN */
        unit_pattern_text = (
            r"inline\s+constexpr\s+struct\s+(\w+)\s+final\s*:\s*"
            r'named_unit<symbol_text\{u8"([^"]+)"(?:\s*/\*[^*]*\*/)?\s*,\s*'
            r'"([^"]+)"\},\s*(.+?)>\s*\{\}\s*(\w+)\s*;'
        )

        # Parse units with symbol_text first
        for match in re.finditer(unit_pattern_text, content, re.DOTALL):
            unicode_symbol = match.group(2)
            ascii_symbol = match.group(3)
            definition_raw = match.group(4)
            var_name = match.group(5)

            # Combine both symbols for display, escaping backticks for markdown
            ascii_escaped = ascii_symbol.replace("`", "\\`")
            symbol = f"{unicode_symbol} ({ascii_escaped})"

            definition = self._extract_template_arg(definition_raw)

            # Skip if inside unit_symbols namespace
            match_pos = match.start()
            before_match = content[:match_pos]
            last_unit_symbols_pos = before_match.rfind("namespace unit_symbols")
            if last_unit_symbols_pos != -1:
                section_after = before_match[last_unit_symbols_pos:]
                open_braces = section_after.count("{")
                close_braces = section_after.count("}")
                if open_braces > close_braces:
                    continue

            # Check namespace filtering
            if namespace_to_search is not None:
                # We're looking for units in a specific namespace
                nested_ns = self._get_nested_namespace(
                    content, match_pos, namespace_to_search
                )
                # Skip if we have a nested namespace or we're not in the right namespace
                if nested_ns:
                    continue
                if not self._is_in_namespace(content, match_pos, namespace_to_search):
                    continue
            # If namespace_to_search is None, accept all units (no filtering)

            # Determine if base unit
            is_base = "kind_of<" in definition and not any(
                op in definition.split("kind_of<")[0]
                for op in ["*", "/", "+", "-", "pow", "square", "cubic"]
            )

            # Detect nested namespace
            nested_ns = self._get_nested_namespace(
                content,
                match_pos,
                system.namespace if namespace_to_search != None else "",
            )
            full_namespace = (
                f"mp_units::{system.namespace}::{nested_ns}"
                if nested_ns and system.namespace
                else f"mp_units::{system.namespace}" if system.namespace else "mp_units"
            )

            # Detect origin namespace
            origin_ns = self._detect_origin_namespace(
                content, match_pos, system.namespace
            )

            unit = Unit(
                name=var_name,
                symbol=symbol,
                definition=definition,
                namespace=full_namespace,
                file=file,
                is_base=is_base,
                subnamespace=nested_ns,
                origin_namespace=origin_ns,
            )
            system.units.append(unit)

        # Parse units with simple string symbols
        for match in re.finditer(unit_pattern_simple, content, re.DOTALL):
            symbol = match.group(2)
            definition_raw = match.group(3)
            var_name = match.group(4)

            definition = self._extract_template_arg(definition_raw)

            # Skip if inside unit_symbols namespace
            match_pos = match.start()
            before_match = content[:match_pos]
            # Check if we're inside unit_symbols namespace by counting braces
            last_unit_symbols_pos = before_match.rfind("namespace unit_symbols")
            if last_unit_symbols_pos != -1:
                # Count opening and closing braces after the last unit_symbols namespace declaration
                section_after = before_match[last_unit_symbols_pos:]
                open_braces = section_after.count("{")
                close_braces = section_after.count("}")
                if open_braces > close_braces:  # Still inside unit_symbols namespace
                    continue

            # Check namespace filtering
            if namespace_to_search is not None:
                # We're looking for units in a specific namespace
                nested_ns = self._get_nested_namespace(
                    content, match_pos, namespace_to_search
                )
                # Skip if we have a nested namespace or we're not in the right namespace
                if nested_ns:
                    continue
                if not self._is_in_namespace(content, match_pos, namespace_to_search):
                    continue
            # If namespace_to_search is None, accept all units (no filtering)

            # Determine if base unit (kind_of<> with no equation before it)
            is_base = "kind_of<" in definition and not any(
                op in definition.split("kind_of<")[0]
                for op in ["*", "/", "+", "-", "pow", "square", "cubic"]
            )

            # Detect nested namespace
            nested_ns = self._get_nested_namespace(
                content,
                match_pos,
                system.namespace if namespace_to_search != None else "",
            )
            full_namespace = (
                f"mp_units::{system.namespace}::{nested_ns}"
                if nested_ns and system.namespace
                else f"mp_units::{system.namespace}" if system.namespace else "mp_units"
            )

            # Detect origin namespace (e.g., non_si)
            origin_ns = self._detect_origin_namespace(
                content, match_pos, system.namespace
            )

            unit = Unit(
                name=var_name,
                symbol=symbol,
                definition=definition,
                namespace=full_namespace,
                file=file,
                is_base=is_base,
                subnamespace=nested_ns,
                origin_namespace=origin_ns,
            )
            system.units.append(unit)

        # Pattern 2: inline constexpr auto NAME = expression; (not in unit_symbols)
        auto_pattern = r"inline\s+constexpr\s+auto\s+(\w+)\s*=\s*([^;]+);"

        # Skip unit_symbols namespace for auto patterns - split on namespace declaration, not include
        unit_symbols_ns_match = re.search(r"namespace\s+.*unit_symbols", content)
        if unit_symbols_ns_match:
            main_content = content[: unit_symbols_ns_match.start()]
        else:
            main_content = content

        for match in re.finditer(auto_pattern, main_content):
            var_name = match.group(1)
            definition = match.group(2).strip()

            # Skip if it looks like a constant (handled separately)
            if any(
                keyword in var_name
                for keyword in ["constant", "speed", "mass", "charge"]
            ):
                continue

            # Skip simple aliases for now - they'll be processed later
            if "<" not in definition:
                continue

            # Detect nested namespace
            match_pos = match.start()
            nested_ns = self._get_nested_namespace(
                main_content, match_pos, system.namespace
            )
            full_namespace = (
                f"mp_units::{system.namespace}::{nested_ns}"
                if nested_ns
                else f"mp_units::{system.namespace}"
            )

            # Detect origin namespace (e.g., non_si)
            origin_ns = self._detect_origin_namespace(
                main_content, match_pos, system.namespace
            )

            # This is a unit definition
            unit = Unit(
                name=var_name,
                symbol="",  # Will try to determine from definition
                definition=definition,
                namespace=full_namespace,
                file=file,
                is_base=False,
                subnamespace=nested_ns,
                origin_namespace=origin_ns,
            )
            system.units.append(unit)

    def _extract_template_arg(self, text: str) -> str:
        """Extract template argument by balancing angle brackets"""
        depth = 0
        result = []
        for char in text:
            if char == "<":
                depth += 1
                result.append(char)
            elif char == ">":
                if depth > 0:
                    depth -= 1
                    result.append(char)
                else:
                    break
            else:
                result.append(char)
        return "".join(result).strip()

    def _parse_point_origins(self, content: str, system: SystemInfo, file: str):
        """Parse point origin definitions"""
        # Pattern for absolute and relative point origins:
        # inline constexpr struct NAME final : absolute_point_origin<...> {} NAME;
        # inline constexpr struct NAME final : relative_point_origin<...> {} NAME;

        origin_pattern = (
            r"inline\s+constexpr\s+struct\s+(\w+)\s+final\s*:\s*"
            r"(absolute|relative)_point_origin<(.+?)>\s*\{\}\s*(\w+)\s*;"
        )

        for match in re.finditer(origin_pattern, content, re.DOTALL):
            origin_type = match.group(2)  # 'absolute' or 'relative'
            template_param_raw = match.group(3)
            var_name = match.group(4)

            # Extract template parameter properly (handle nested angle brackets)
            template_param = self._extract_template_arg(template_param_raw)

            # Skip if inside unit_symbols namespace
            match_pos = match.start()
            before_match = content[:match_pos]
            last_unit_symbols_pos = before_match.rfind("namespace unit_symbols")
            if last_unit_symbols_pos != -1:
                section_after = before_match[last_unit_symbols_pos:]
                open_braces = section_after.count("{")
                close_braces = section_after.count("}")
                if open_braces > close_braces:
                    continue

            point_origin = PointOrigin(
                name=var_name,
                origin_type=origin_type,
                definition=template_param,
                namespace=f"mp_units::{system.namespace}",
                file=file,
            )
            system.point_origins.append(point_origin)

    def _parse_prefixes(self, content: str, system: SystemInfo, file: str):
        """Parse prefix template definitions (two-line pattern: class template + variable template)"""
        # Pattern for class template: template<PrefixableUnit U> struct NAME_ final :
        #   prefixed_unit<...> {};
        # Followed by: template<PrefixableUnit auto U> constexpr NAME_<...> NAME;

        # First, find all class template definitions
        class_pattern = (
            r"template<PrefixableUnit U>\s+struct\s+(\w+)_\s+final\s*:\s*"
            r'prefixed_unit<(?:symbol_text\{u8"([^"]+)"\s*,\s*"([^"]+)"\}'
            r'|"([^"]+)"),\s*(.+?),\s*U\{\}>\s*\{\}\s*;'
        )

        # Store class template info: name -> (symbol, definition)
        class_templates = {}

        for match in re.finditer(class_pattern, content, re.DOTALL):
            class_name = match.group(1)  # e.g., "kilo"
            unicode_symbol = match.group(2)  # If symbol_text
            ascii_symbol = match.group(3)  # If symbol_text
            simple_symbol = match.group(4)  # If simple string
            definition_raw = match.group(5)

            # Extract clean definition (magnitude)
            definition = self._extract_template_arg(definition_raw.strip())

            # Combine symbols if symbol_text was used
            if unicode_symbol and ascii_symbol:
                ascii_escaped = ascii_symbol.replace("`", "\\`")
                symbol = f"{unicode_symbol} ({ascii_escaped})"
            else:
                symbol = simple_symbol

            class_templates[class_name] = (symbol, definition)

        # Now find variable template definitions and match with class templates
        var_pattern = (
            r"template<PrefixableUnit auto U>\s+constexpr\s+(\w+)_<[^>]+>\s+(\w+)\s*;"
        )

        for match in re.finditer(var_pattern, content):
            class_ref = match.group(1)  # e.g., "kilo"
            var_name = match.group(2)  # e.g., "kilo" (the user-facing name)

            # Look up the class template
            if class_ref in class_templates:
                symbol, definition = class_templates[class_ref]

                prefix = Prefix(
                    name=var_name,
                    symbol=symbol,
                    definition=definition,
                    namespace=f"mp_units::{system.namespace}",
                    file=file,
                )
                system.prefixes.append(prefix)

        # Also handle chrono_point_origin template pattern:
        # template<typename C> struct chrono_point_origin_ final :
        #   absolute_point_origin<isq::time> { using clock = C; };
        # template<typename C> constexpr chrono_point_origin_<C> chrono_point_origin;

        # First, find the class template
        chrono_class_pattern = (
            r"template<typename C>\s+struct\s+chrono_point_origin_\s+final\s*:\s*"
            r"absolute_point_origin<(.+?)>\s*\{[^}]*\}\s*;"
        )
        chrono_class_match = re.search(chrono_class_pattern, content, re.DOTALL)

        if chrono_class_match:
            template_param = self._extract_template_arg(chrono_class_match.group(1))

            # Now find the variable template
            chrono_var_pattern = (
                r"template<typename C>\s+constexpr\s+"
                r"chrono_point_origin_<C>\s+(\w+)\s*;"
            )
            chrono_var_match = re.search(chrono_var_pattern, content)

            if chrono_var_match:
                var_name = chrono_var_match.group(1)

                # Add it as a point origin (not a prefix)
                point_origin = PointOrigin(
                    name=var_name,
                    origin_type="absolute",
                    definition=template_param,
                    namespace=f"mp_units::{system.namespace}",
                    file=file,
                )
                system.point_origins.append(point_origin)

    def _parse_aliases(self, content: str, system: SystemInfo, file: str):
        """Parse alias assignments and add them to the appropriate entity collection"""
        # Pattern: inline constexpr auto NAME = other_name;
        alias_pattern = r"inline\s+constexpr\s+auto\s+(\w+)\s*=\s*([^;]+);"

        # Skip unit_symbols namespace
        unit_symbols_ns_match = re.search(r"namespace\s+.*unit_symbols", content)
        if unit_symbols_ns_match:
            main_content = content[: unit_symbols_ns_match.start()]
        else:
            main_content = content

        for match in re.finditer(alias_pattern, main_content):
            alias_name = match.group(1)
            target_name = match.group(2).strip()

            # Skip if it has angle brackets (it's a unit definition, not an alias)
            if "<" in target_name:
                continue

            # Check if target has namespace qualification
            if "::" in target_name:
                # Qualified name - look only in specified system
                target_sys_name = target_name.split("::")[0]
                target_lookup = target_name.split("::")[-1]
                search_systems = (
                    [self.systems.get(target_sys_name)]
                    if target_sys_name in self.systems
                    else []
                )
            else:
                # Unqualified name - search current system first, then others
                target_lookup = target_name
                search_systems = [system] + [
                    s for s in self.systems.values() if s != system
                ]

            # Check if target is a point origin
            target_origin = None
            for search_system in search_systems:
                if search_system is None:
                    continue
                for origin in search_system.point_origins:
                    if origin.name == target_lookup:
                        target_origin = origin
                        break
                if target_origin:
                    break

            if target_origin:
                # Determine the display name for alias_target
                if "::" in target_name:
                    # Qualified name in source - use it as-is
                    alias_target_display = target_name
                else:
                    # Unqualified name - strip namespace if same system
                    target_system_name = target_origin.namespace.replace(
                        "mp_units::", ""
                    )
                    if target_system_name == system.namespace:
                        alias_target_display = target_origin.name
                    else:
                        alias_target_display = (
                            f"{target_system_name}::{target_origin.name}"
                        )

                # Add as an alias point origin (use current system's namespace, not target's)
                alias_origin = PointOrigin(
                    name=alias_name,
                    origin_type=target_origin.origin_type,
                    definition=target_origin.definition,
                    namespace=f"mp_units::{system.namespace}",
                    file=file,
                    alias_target=alias_target_display,
                )
                system.point_origins.append(alias_origin)
                continue

            # Check if target is a quantity
            target_quantity = None
            for search_system in search_systems:
                if search_system is None:
                    continue
                for qty in search_system.quantities:
                    if qty.name == target_lookup:
                        target_quantity = qty
                        break
                if target_quantity:
                    break

            if target_quantity:
                # Determine the display name for alias_target
                if "::" in target_name:
                    # Qualified name in source - use it as-is
                    alias_target_display = target_name
                else:
                    # Unqualified name - strip namespace if same system
                    target_system_name = target_quantity.namespace.replace(
                        "mp_units::", ""
                    )
                    if target_system_name == system.namespace:
                        alias_target_display = target_quantity.name
                    else:
                        alias_target_display = (
                            f"{target_system_name}::{target_quantity.name}"
                        )

                # Add as an alias quantity
                alias_quantity = Quantity(
                    name=alias_name,
                    parent=target_quantity.parent,
                    dimension=target_quantity.dimension,
                    equation=target_quantity.equation,
                    namespace=f"mp_units::{system.namespace}",
                    file=file,
                    is_kind=target_quantity.is_kind,
                    alias_target=alias_target_display,
                )
                system.quantities.append(alias_quantity)
                continue

            # Check if target is a unit
            target_unit = None
            for search_system in search_systems:
                if search_system is None:
                    continue
                for unit in search_system.units:
                    if unit.name == target_lookup:
                        target_unit = unit
                        break
                if target_unit:
                    break

            if target_unit:
                # Determine the display name for alias_target
                if "::" in target_name:
                    # Qualified name in source - use it as-is
                    alias_target_display = target_name
                else:
                    # Unqualified name - strip namespace if same system
                    target_system_name = target_unit.namespace.replace("mp_units::", "")
                    if target_system_name == system.namespace:
                        alias_target_display = target_unit.name
                    else:
                        alias_target_display = (
                            f"{target_system_name}::{target_unit.name}"
                        )

                # Add as an alias unit (use current system's namespace, not target's)
                alias_unit = Unit(
                    name=alias_name,
                    symbol=target_unit.symbol,
                    definition=target_unit.definition,
                    namespace=f"mp_units::{system.namespace}",
                    file=file,
                    is_base=target_unit.is_base,
                    subnamespace=target_unit.subnamespace,
                    origin_namespace=target_unit.origin_namespace,
                    alias_target=alias_target_display,
                )
                system.units.append(alias_unit)

    def _parse_unit_symbols(self, content: str, system: SystemInfo):
        """Parse unit_symbols namespace for short aliases and using declarations"""
        # Find ALL unit_symbols namespace blocks (there can be multiple)
        # Pattern matches: inline constexpr auto NAME = UNIT_REF;
        # But NOT compound expressions like: inline constexpr auto mph = mile / si::hour;
        # Allow qualified names like si::ohm
        symbol_pattern = r"inline\s+constexpr\s+auto\s+(\w+)\s*=\s*([\w:]+)\s*;"
        # Pattern for using declarations: using namespace::name;
        using_pattern = r"using\s+(?:[\w:]+::)?(\w+)\s*;"

        for unit_symbols_match in re.finditer(
            r"namespace\s+(?:[\w:]+::)?unit_symbols\s*\{(.*?)\}", content, re.DOTALL
        ):
            symbols_content = unit_symbols_match.group(1)

            # Parse inline constexpr assignments
            for match in re.finditer(symbol_pattern, symbols_content):
                symbol_name = match.group(1)
                unit_ref = match.group(2)

                # Extract just the unit name (strip namespace if present)
                unit_name = unit_ref.split("::")[-1]

                for unit in system.units:
                    if unit.name == unit_name:
                        if symbol_name not in unit.unit_symbols:
                            unit.unit_symbols.append(symbol_name)
                        break

            # Parse using declarations (e.g., using si::ohm;)
            for match in re.finditer(using_pattern, symbols_content):
                unit_name = match.group(1)

                for unit in system.units:
                    if unit.name == unit_name:
                        if unit_name not in unit.unit_symbols:
                            unit.unit_symbols.append(unit_name)
                        break

    def _parse_using_declarations(self, content: str, system: SystemInfo, file: str):
        """Parse using declarations for imported units (excluding math functions)"""
        using_pattern = r"using\s+([\w:]+)::([\w]+)\s*;"

        for match in re.finditer(using_pattern, content):
            full_namespace = match.group(1)
            unit_name = match.group(2)

            # Skip if importing from std namespace (likely functions)
            if "std" in full_namespace:
                continue

            unit = Unit(
                name=unit_name,
                symbol=f"(imported from {full_namespace})",
                definition=f"using {full_namespace}::{unit_name}",
                namespace=f"mp_units::{system.namespace}",
                file=file,
                is_alias=True,
            )
            system.units.append(unit)


class DocumentationGenerator:
    """Generates markdown documentation from parsed systems"""

    def __init__(self, parser: SystemsParser, output_dir: Path):
        self.parser = parser
        self.output_dir = output_dir
        self.output_dir.mkdir(parents=True, exist_ok=True)

    @staticmethod
    def _write_auto_generated_header(f):
        """Write auto-generation warning header to a file"""
        f.write("<!-- This file is auto-generated. Do not edit manually. -->\n")
        f.write("<!-- Run: python3 scripts/systems_reference.py --force -->\n\n")

    @staticmethod
    def _get_prefix_magnitude(prefix: Prefix) -> float:
        """Extract magnitude from prefix definition for sorting.

        Parses definitions like 'mag_power<10, 3>' to get 10^3 = 1000.
        Returns 1.0 if parsing fails.
        """
        import re

        match = re.search(r"mag_power<\s*(\d+)\s*,\s*(-?\d+)\s*>", prefix.definition)
        if match:
            base = int(match.group(1))
            exponent = int(match.group(2))
            return base**exponent
        return 1.0

    @staticmethod
    def _get_system_display_name(namespace: str) -> str:
        """Get display name for a system namespace."""
        if namespace == "isq_angle":
            return "ISQ Angle"
        elif namespace in ["cgs", "hep", "iau", "iec", "iec80000", "isq", "si", "usc"]:
            return namespace.upper()
        else:
            return namespace.replace("_", " ").title()

    def update_mkdocs_config(self, mkdocs_path: Path):
        """Update mkdocs.yml with generated pages"""
        if not mkdocs_path.exists():
            print(f"Warning: mkdocs.yml not found at {mkdocs_path}")
            return

        try:
            content = mkdocs_path.read_text()
            ref_start = content.find("  - Reference:")
            if ref_start == -1:
                print("Warning: Could not find Reference section in mkdocs.yml")
                return

            ref_end = content.find("\n  - ", ref_start + 1)
            if ref_end == -1:
                ref_end = len(content)

            systems_ref_lines = self._build_systems_reference_yaml()
            systems_ref_start = content.find(
                "      - Systems Reference:", ref_start, ref_end
            )

            if systems_ref_start != -1:
                next_item = ref_end
                for match in re.finditer(
                    r"\n      - [A-Z]", content[systems_ref_start + 1 : ref_end]
                ):
                    next_item = systems_ref_start + 1 + match.start()
                    break

                new_content = (
                    content[:systems_ref_start]
                    + systems_ref_lines.rstrip("\n")
                    + content[next_item:]
                )
            else:
                insert_pos = content.find(
                    "      - Supported Systems Overview:", ref_start, ref_end
                )
                if insert_pos == -1:
                    insert_pos = content.find(
                        "      - Cheat Sheet:", ref_start, ref_end
                    )

                if insert_pos != -1:
                    line_end = content.find("\n", insert_pos)
                    new_content = (
                        content[: line_end + 1]
                        + systems_ref_lines
                        + content[line_end + 1 :]
                    )
                else:
                    print("Warning: Could not find insertion point in mkdocs.yml")
                    return

            mkdocs_path.write_text(new_content)
            print("  ✓ Updated mkdocs.yml with generated pages")

        except Exception as e:
            print(f"Warning: Could not update mkdocs.yml: {e}")

    def _build_systems_reference_yaml(self):
        """Build the YAML text for Systems Reference section"""
        lines = ["      - Systems Reference:\n"]
        lines.append("          - Overview: reference/systems_reference/index.md\n")
        lines.append(
            "          - Dimensions: reference/systems_reference/dimensions_index.md\n"
        )
        lines.append(
            "          - Quantities: reference/systems_reference/quantities_index.md\n"
        )
        lines.append("          - Units: reference/systems_reference/units_index.md\n")
        lines.append(
            "          - Prefixes: reference/systems_reference/prefixes_index.md\n"
        )
        lines.append(
            "          - Point Origins: reference/systems_reference/point_origins_index.md\n"
        )
        lines.append("          - Systems:\n")

        for namespace in sorted(self.parser.systems.keys()):
            system = self.parser.systems[namespace]
            if not (system.units or system.dimensions or system.quantities):
                continue

            display_name = self._get_system_display_name(namespace)
            lines.append(
                f"              - {display_name}: reference/systems_reference/systems/{namespace}.md\n"
            )

        # Add Quantity Hierarchies section
        lines.append("          - Quantity Hierarchies:\n")
        lines.append(
            "              - Overview: reference/systems_reference/hierarchies/index.md\n"
        )

        # Collect all root quantities with their systems
        root_to_systems = defaultdict(list)  # root_name -> [system_namespaces]
        for namespace in sorted(self.parser.systems.keys()):
            system = self.parser.systems[namespace]
            if system.quantities:
                for qty in system.quantities:
                    # Check if root using C++ extracted parent
                    is_root = (
                        hasattr(qty, "parent_from_cpp")
                        and qty.parent_from_cpp == "<root>"
                    )
                    if is_root:
                        root_to_systems[qty.name].append(namespace)

        # Add hierarchy pages with system names if multiple systems have same root
        for root_name in sorted(root_to_systems.keys()):
            systems = root_to_systems[root_name]
            # dimensionless is always cross-system
            if root_name == "dimensionless":
                lines.append(
                    f"              - {root_name}: reference/systems_reference/hierarchies/{root_name}.md\n"
                )
            elif len(systems) == 1:
                lines.append(
                    f"              - {root_name}: reference/systems_reference/hierarchies/{root_name}.md\n"
                )
            else:
                # Multiple systems: add with system name suffix in TOC
                for ns in sorted(systems):
                    lines.append(
                        f"              - {root_name} ({ns}): "
                        f"reference/systems_reference/hierarchies/{root_name}_{ns}.md\n"
                    )

        return "".join(lines)

    def generate_systems_index(self):
        """Generate the main systems index page"""
        output_file = self.output_dir / "index.md"

        with open(output_file, "w") as f:
            self._write_auto_generated_header(f)
            f.write("# Systems Reference\n\n")
            f.write(
                "Automatically generated reference documentation "
                "for all **mp-units** systems.\n\n"
            )
            f.write("## Indexes\n\n")
            f.write("- [Dimensions](dimensions_index.md) - All base dimensions\n")
            f.write("- [Quantities](quantities_index.md) - All quantities\n")
            f.write("- [Units](units_index.md) - All units\n")
            f.write("- [Prefixes](prefixes_index.md) - All prefixes\n")
            f.write("- [Point Origins](point_origins_index.md) - All point origins\n\n")
            f.write("## Systems\n\n")

            # Write table header
            f.write(
                "| System | Dimensions | Quantities | Units | Prefixes | Point Origins |\n"
            )
            f.write(
                "|--------|:----------:|:----------:|:-----:|:--------:|:-------------:|\n"
            )

            for namespace in sorted(self.parser.systems.keys()):
                system = self.parser.systems[namespace]
                display = self._get_system_display_name(namespace)
                dims = len(system.dimensions)
                qtys = len(system.quantities)
                units = len(system.units)
                prefixes = len(system.prefixes)
                origins = len(system.point_origins)

                # Format counts with em-dash for zero
                dims_str = str(dims) if dims else "—"
                qtys_str = str(qtys) if qtys else "—"
                units_str = str(units) if units else "—"
                prefixes_str = str(prefixes) if prefixes else "—"
                origins_str = str(origins) if origins else "—"

                f.write(
                    f"| [{display}](systems/{namespace}.md) | {dims_str} | {qtys_str} | {units_str} | {prefixes_str} | {origins_str} |\n"
                )

    def generate_dimensions_index(self):
        """Generate alphabetical dimensions index"""
        output_file = self.output_dir / "dimensions_index.md"

        all_dimensions = []
        for sys_key, system in self.parser.systems.items():
            for dim in system.dimensions:
                display_ns = system.namespace if system.namespace else "mp_units"
                all_dimensions.append((dim.name, dim.symbol, display_ns, sys_key))

        with open(output_file, "w") as f:
            self._write_auto_generated_header(f)
            f.write("# Dimensions Index\n\n")
            f.write("Alphabetical list of all base dimensions.\n\n")

            # Sort by name first, then namespace
            for name, symbol, display_ns, sys_key in sorted(
                all_dimensions, key=lambda x: (x[0], x[2])
            ):
                f.write(f"- [{name} ({display_ns})](systems/{sys_key}.md#{name})\n")

            f.write(f"\n**Total dimensions:** {len(all_dimensions)}\n")

    def generate_quantities_index(self):
        """Generate alphabetical quantities index"""
        output_file = self.output_dir / "quantities_index.md"

        all_quantities = []
        for sys_key, system in self.parser.systems.items():
            for qty in system.quantities:
                # Use "mp_units" for empty namespace (core system)
                display_ns = system.namespace if system.namespace else "mp_units"
                all_quantities.append((qty.name, sys_key, display_ns))

        with open(output_file, "w") as f:
            self._write_auto_generated_header(f)
            f.write("# Quantities Index\n\n")
            f.write("Alphabetical list of all quantities.\n\n")

            # Sort by name first, then system key
            for name, sys_key, display_ns in sorted(
                all_quantities, key=lambda x: (x[0], x[1])
            ):
                f.write(f"- [`{name}` ({display_ns})](systems/{sys_key}.md#{name})\n")

            f.write(f"\n**Total quantities:** {len(all_quantities)}\n")

    def generate_units_index(self):
        """Generate alphabetical units index"""
        output_file = self.output_dir / "units_index.md"

        all_units = []
        for sys_key, system in self.parser.systems.items():
            for unit in system.units:
                # Extract the namespace path after mp_units::
                full_ns = unit.namespace.replace("mp_units::", "")
                # If empty (core system), show "mp_units"
                if not full_ns:
                    full_ns = "mp_units"
                all_units.append((unit.name, sys_key, full_ns, unit))

        with open(output_file, "w") as f:
            self._write_auto_generated_header(f)
            f.write("# Units Index\n\n")
            f.write("Alphabetical list of all units.\n\n")

            # Sort by unit name first, then full namespace
            for name, sys_key, full_ns, unit in sorted(
                all_units, key=lambda x: (x[0], x[2])
            ):
                f.write(f"- [`{name}` ({full_ns})](systems/{sys_key}.md#{name})\n")

            f.write(f"\n**Total units:** {len(all_units)}\n")

    def generate_cross_system_hierarchies(self):
        """Generate quantity hierarchies (separate files when same root exists in multiple systems)
        Returns the number of hierarchy files generated.
        """
        hierarchies_dir = self.output_dir / "hierarchies"
        hierarchies_dir.mkdir(exist_ok=True)

        # Clean up old hierarchy files
        for old_file in hierarchies_dir.glob("*.md"):
            if old_file.name != "index.md":
                old_file.unlink()

        # Collect all root quantities by (name, namespace)
        root_to_systems = defaultdict(list)  # root_name -> [(system_namespace, system)]
        hierarchy_count = 0

        for namespace, system in self.parser.systems.items():
            if not system.quantities:
                continue

            # Check if this system has dimensionless children
            has_dimensionless_children = any(
                hasattr(q, "parent_from_cpp") and q.parent_from_cpp == "dimensionless"
                for q in system.quantities
            )

            if has_dimensionless_children:
                # Check if dimensionless root already exists in this system
                has_dimensionless_root = any(
                    q.name == "dimensionless"
                    and hasattr(q, "parent_from_cpp")
                    and q.parent_from_cpp == "<root>"
                    for q in system.quantities
                )

                if not has_dimensionless_root:
                    # Add synthetic dimensionless root
                    dimensionless_root = Quantity(
                        name="dimensionless",
                        parent="",
                        dimension="dimensionless",
                        equation="",
                        namespace=f"mp_units::{namespace}",
                        file="",
                        is_kind=True,
                        alias_target="",
                        dimensional_formula="1",
                        character="Real",
                        kind_of="dimensionless",
                        parent_from_cpp="<root>",
                    )
                    system.quantities.append(dimensionless_root)

            # Find roots - use C++ extracted parent only, skip aliases
            for qty in system.quantities:
                # Skip aliases - they should not be counted as roots
                if qty.alias_target:
                    continue

                is_root = False
                if hasattr(qty, "parent_from_cpp"):
                    is_root = (
                        qty.parent_from_cpp == "<root>" or qty.parent_from_cpp == ""
                    )

                if is_root:
                    root_to_systems[qty.name].append((namespace, system))

        # Decide how to generate files for each root name
        for root_name in sorted(root_to_systems.keys()):
            systems_info = root_to_systems[root_name]

            # Special case: dimensionless is always cross-system
            if root_name == "dimensionless":
                self._generate_hierarchy_file(
                    hierarchies_dir / "dimensionless.md",
                    root_name,
                    systems_info,
                    cross_system=True,
                )
                hierarchy_count += 1
            # If only one system has this root, create single file
            elif len(systems_info) == 1:
                namespace, system = systems_info[0]
                hierarchy_file = hierarchies_dir / f"{root_name}.md"
                self._generate_hierarchy_file(
                    hierarchy_file, root_name, [(namespace, system)], cross_system=False
                )
                hierarchy_count += 1
            # Multiple systems have roots with same name: create separate files for each
            else:
                for namespace, system in systems_info:
                    hierarchy_file = hierarchies_dir / f"{root_name}_{namespace}.md"
                    self._generate_hierarchy_file(
                        hierarchy_file,
                        root_name,
                        [(namespace, system)],
                        cross_system=False,
                    )
                    hierarchy_count += 1

        return hierarchy_count

    def _generate_hierarchy_file(
        self,
        hierarchy_file: Path,
        root_name: str,
        systems_info: list,
        cross_system: bool,
    ):
        """Generate a single hierarchy file"""
        # Collect all quantities from the specified systems
        all_quantities = []
        for namespace, system in systems_info:
            all_quantities.extend(system.quantities)

        # Get the root quantity from the first system (they should all be equivalent)
        first_namespace, first_system = systems_info[0]
        root_qty = next(
            (
                q
                for q in first_system.quantities
                if q.name == root_name
                and hasattr(q, "parent_from_cpp")
                and q.parent_from_cpp == "<root>"
            ),
            None,
        )

        if not root_qty:
            return

        with open(hierarchy_file, "w") as hf:
            self._write_auto_generated_header(hf)
            hf.write(f"# {root_name} Hierarchy\n\n")

            # List all systems contributing to this hierarchy
            system_names = []
            for ns, _ in systems_info:
                if ns == "isq_angle":
                    system_names.append("ISQ Angle")
                elif ns in ["cgs", "hep", "iau", "iec", "iec80000", "isq", "si", "usc"]:
                    system_names.append(ns.upper())
                else:
                    system_names.append(ns.replace("_", " ").title())

            if len(system_names) == 1:
                hf.write(f"**System:** {system_names[0]}\n\n")
            else:
                hf.write(f"**Systems:** {', '.join(system_names)}\n\n")

            if root_qty.dimension:
                hf.write(f"**Dimension:** {root_qty.dimension}\n\n")

            # Create qualified names for all quantities (namespace::name, but no prefix for dimensionless)
            qualified_quantities = []
            for qty in all_quantities:
                sys_ns = qty.namespace.replace("mp_units::", "")
                if qty.name == "dimensionless":
                    qualified_name = "dimensionless"
                else:
                    qualified_name = f"{sys_ns}::{qty.name}"
                qualified_quantities.append((qualified_name, qty))

            # Build children map using qualified names - use C++ extracted parent only
            qty_children = defaultdict(list)
            for qualified_name, qty in qualified_quantities:
                parent_name = None
                if (
                    hasattr(qty, "parent_from_cpp")
                    and qty.parent_from_cpp
                    and qty.parent_from_cpp != "<root>"
                ):
                    parent_name = qty.parent_from_cpp

                if parent_name:
                    # Get the qualified parent name
                    if parent_name == "dimensionless":
                        qualified_parent = "dimensionless"
                    else:
                        # C++ parent already includes namespace (e.g., 'isq::length')
                        qualified_parent = parent_name
                    qty_children[qualified_parent].append((qualified_name, qty))

            # Generate Mermaid diagram
            hf.write(
                self._build_mermaid_hierarchy(
                    root_name, qty_children, qualified_quantities
                )
            )
            hf.write("\n")

    def generate_hierarchies_overview(self):
        """Generate overview page for all quantity hierarchies"""
        hierarchies_dir = self.output_dir / "hierarchies"
        hierarchies_dir.mkdir(exist_ok=True)
        output_file = hierarchies_dir / "index.md"

        # Collect root quantities grouped by dimensional formula
        dimension_to_roots = defaultdict(
            list
        )  # dimensional_formula -> [qualified_name]

        # Also collect all root names globally to determine if namespace suffix is needed
        global_root_counts = defaultdict(set)  # root_name -> set of namespaces

        for namespace, system in self.parser.systems.items():
            if not system.quantities:
                continue

            for qty in system.quantities:
                # Check if root using C++ extracted parent
                is_root = (
                    hasattr(qty, "parent_from_cpp") and qty.parent_from_cpp == "<root>"
                )
                if is_root:
                    dim_formula = (
                        qty.dimensional_formula
                        if hasattr(qty, "dimensional_formula")
                        and qty.dimensional_formula
                        else "?"
                    )
                    # Create qualified name
                    if qty.name == "dimensionless":
                        qualified_name = "dimensionless"
                    else:
                        qualified_name = f"{namespace}::{qty.name}"
                    dimension_to_roots[dim_formula].append(qualified_name)
                    global_root_counts[qty.name].add(namespace)

        with open(output_file, "w") as f:
            self._write_auto_generated_header(f)
            f.write("# Quantity Hierarchies\n\n")
            f.write(
                "This section contains all quantity hierarchy trees across all systems, "
                "grouped by their dimensional formula to help identify dimensionally "
                "equivalent quantities.\n\n"
            )

            # Sort by: "1" first, then by string length (shortest to longest), then alphabetically, "?" last
            def dim_sort_key(dim_formula):
                if dim_formula == "1":
                    return (0, 0, dim_formula)  # First: dimensionless
                elif dim_formula == "?":
                    return (2, 0, dim_formula)  # Last: unknown
                else:
                    # Sort by length first (base quantities like L, M, T before derived like LT⁻¹)
                    # Then alphabetically for same length
                    return (1, len(dim_formula), dim_formula)

            sorted_dims = sorted(dimension_to_roots.keys(), key=dim_sort_key)
            for idx, dim_formula in enumerate(sorted_dims):
                roots = dimension_to_roots[dim_formula]
                if not roots:
                    continue

                f.write(f"## Dimension: {dim_formula}\n\n")

                # Sort roots and create bullet list with links
                for qualified_name in sorted(set(roots)):
                    # Extract root name and namespace for link
                    root_name = qualified_name.split("::")[-1]

                    # Determine link based on whether there are multiple systems with same root GLOBALLY
                    if root_name == "dimensionless":
                        link = f"[`{qualified_name}`]({root_name}.md)"
                    else:
                        # Check if multiple systems have this root name (globally across all dimensions)
                        if len(global_root_counts[root_name]) > 1:
                            # Multiple systems have this root - use namespace-specific file
                            namespace = qualified_name.split("::")[0]
                            link = f"[`{qualified_name}`]({root_name}_{namespace}.md)"
                        else:
                            link = f"[`{qualified_name}`]({root_name}.md)"

                    f.write(f"- {link}\n")

                # Add blank line between sections, but not after the last one
                if idx < len(sorted_dims) - 1:
                    f.write("\n")

    def generate_point_origins_index(self):
        """Generate alphabetical point origins index"""
        output_file = self.output_dir / "point_origins_index.md"

        all_origins = []
        for system in self.parser.systems.values():
            for origin in system.point_origins:
                # Use system.namespace for display since that's where it's documented
                all_origins.append((origin.name, system.namespace, origin))

        with open(output_file, "w") as f:
            self._write_auto_generated_header(f)
            f.write("# Point Origins Index\n\n")
            f.write("Alphabetical list of all point origins.\n\n")

            # Sort by origin name first, then system namespace
            for name, sys_ns, origin in sorted(all_origins, key=lambda x: (x[0], x[1])):
                f.write(f"- [`{name}` ({sys_ns})](systems/{sys_ns}.md#{name})\n")

            f.write(f"\n**Total point origins:** {len(all_origins)}\n")

    def generate_prefixes_index(self):
        """Generate alphabetical prefixes index"""
        output_file = self.output_dir / "prefixes_index.md"

        all_prefixes = []
        for system in self.parser.systems.values():
            for prefix in system.prefixes:
                # Use system.namespace for display since that's where it's documented
                all_prefixes.append((prefix.name, system.namespace, prefix))

        with open(output_file, "w") as f:
            self._write_auto_generated_header(f)
            f.write("# Prefixes Index\n\n")
            f.write("Alphabetical list of all prefixes.\n\n")

            # Sort by prefix name first, then system namespace
            for name, sys_ns, prefix in sorted(
                all_prefixes, key=lambda x: (x[0], x[1])
            ):
                f.write(f"- [`{name}` ({sys_ns})](systems/{sys_ns}.md#{name})\n")

            f.write(f"\n**Total prefixes:** {len(all_prefixes)}\n")

    def _compute_global_root_counts(self):
        """Compute which root names exist in multiple systems"""
        global_root_counts = defaultdict(set)  # root_name -> set of namespaces

        for namespace, system in self.parser.systems.items():
            for qty in system.quantities:
                is_root = (
                    hasattr(qty, "parent_from_cpp") and qty.parent_from_cpp == "<root>"
                )
                if is_root and not qty.alias_target:
                    global_root_counts[qty.name].add(namespace)

        return global_root_counts

    def _get_hierarchy_filename(self, root_name, system_namespace, global_root_counts):
        """Get the correct hierarchy filename for a root"""
        if root_name == "dimensionless":
            return f"{root_name}.md"
        elif len(global_root_counts[root_name]) > 1:
            # Multiple systems have this root - use namespace-specific file
            return f"{root_name}_{system_namespace}.md"
        else:
            # Only one system has this root - no namespace suffix
            return f"{root_name}.md"

    def generate_per_system_pages(self):
        """Generate individual pages for each system"""
        # Compute global root counts for hierarchy filename determination
        global_root_counts = self._compute_global_root_counts()

        # Create systems subdirectory
        systems_dir = self.output_dir / "systems"
        systems_dir.mkdir(parents=True, exist_ok=True)

        for namespace in sorted(self.parser.systems.keys()):
            system = self.parser.systems[namespace]

            # Create system file in systems subdirectory
            output_file = systems_dir / f"{namespace}.md"

            with open(output_file, "w") as f:
                self._write_auto_generated_header(f)
                # Special case for compound names like isq_angle
                if namespace == "isq_angle":
                    display_name = "ISQ Angle"
                elif namespace in [
                    "cgs",
                    "hep",
                    "iau",
                    "iec",
                    "iec80000",
                    "isq",
                    "si",
                    "usc",
                ]:
                    display_name = namespace.upper()
                else:
                    display_name = namespace.replace("_", " ").title()
                f.write(f"# {display_name} System\n\n")
                # Core system has empty namespace, meaning mp_units::
                if system.namespace:
                    f.write(f"**Namespace:** `mp_units::{system.namespace}`\n\n")
                else:
                    f.write("**Namespace:** `mp_units`\n\n")

                # Add Module line
                if system.namespace:
                    f.write("**Module:** `mp_units.systems`\n\n")
                else:
                    f.write("**Module:** `mp_units.core`\n\n")

                if system.files:
                    # Check if first file is a primary header (umbrella header at top level)
                    first_file = system.files[0]
                    first_file_str = str(first_file)
                    if "/include/" in first_file_str:
                        first_rel_path = first_file_str.split("/include/", 1)[1]
                    else:
                        first_rel_path = str(
                            first_file.relative_to(
                                first_file.parent.parent.parent.parent
                            )
                        )

                    # Check if it's a primary header (e.g., isq.h, si.h - no subdirectory)
                    is_primary = (
                        "/" not in first_rel_path.split("/systems/", 1)[1]
                        if "/systems/" in first_rel_path
                        else False
                    )

                    if is_primary:
                        # Show primary header inline (like namespace)
                        f.write(f"**Header:** `<{first_rel_path}>`\n")

                        # Show secondary headers if there are any
                        if len(system.files) > 1:
                            f.write("\n**Secondary Headers:**\n\n")
                            # Sort secondary headers alphabetically
                            secondary_headers = []
                            for file in system.files[1:]:
                                file_str = str(file)
                                if "/include/" in file_str:
                                    rel_path = file_str.split("/include/", 1)[1]
                                else:
                                    rel_path = file.relative_to(
                                        file.parent.parent.parent.parent
                                    )
                                secondary_headers.append(str(rel_path))
                            secondary_headers.sort()
                            for rel_path in secondary_headers:
                                f.write(f"- `<{rel_path}>`\n")
                    elif len(system.files) == 1:
                        # Single header, show inline
                        f.write(f"**Header:** `<{first_rel_path}>`\n")
                    else:
                        # Multiple headers at same level, show as list
                        f.write("**Headers:**\n\n")
                        for file in system.files:
                            file_str = str(file)
                            if "/include/" in file_str:
                                rel_path = file_str.split("/include/", 1)[1]
                            else:
                                rel_path = file.relative_to(
                                    file.parent.parent.parent.parent
                                )
                            f.write(f"- `<{rel_path}>`\n")

                # Track if we need a separator before next section
                need_separator = bool(system.files)

                # Dimensions
                if system.dimensions:
                    if need_separator:
                        f.write("\n")
                    f.write("## Dimensions\n\n")
                    f.write("| Name | Symbol |\n")
                    f.write("|------|:------:|\n")
                    for dim in sorted(system.dimensions, key=lambda d: d.name):
                        f.write(
                            f'| <span id="{dim.name}"></span>`{dim.name}` | {dim.symbol} |\n'
                        )
                    need_separator = True

                # Quantities
                if system.quantities:
                    if need_separator:
                        f.write("\n")
                    f.write("## Quantities\n\n")

                    # Helper to add word breaks to long identifiers
                    def add_word_breaks(name: str) -> str:
                        if "_" in name:
                            return name.replace("_", "_<wbr>")
                        return name

                    # Write table of quantities - columns reordered: Character, Dimension (3rd), Kind of, Parent
                    f.write(
                        "| Quantity | Character | Dimension | Kind of | Parent | Equation | Hierarchy |\n"
                    )
                    f.write(
                        "|----------|:---------:|:---------:|:-------:|:------:|----------|:---------:|\n"
                    )
                    for qty in sorted(system.quantities, key=lambda q: q.name):
                        character = (
                            qty.character if hasattr(qty, "character") else "Real"
                        )
                        # Get dimensional formula
                        dim_formula = (
                            qty.dimensional_formula
                            if hasattr(qty, "dimensional_formula")
                            and qty.dimensional_formula
                            else "—"
                        )

                        if qty.alias_target:
                            # This is an alias - find the target and get its data
                            target_qty = None
                            for q in system.quantities:
                                if q.name == qty.alias_target:
                                    target_qty = q
                                    break

                            if target_qty:
                                # Get root name from kind_of (e.g., 'isq::length' -> 'length')
                                root_name = (
                                    target_qty.kind_of.split("::")[-1]
                                    if hasattr(target_qty, "kind_of")
                                    and target_qty.kind_of
                                    else "dimensionless"
                                )
                                # Only generate hierarchy link if this root actually exists
                                if root_name in global_root_counts:
                                    hierarchy_file = self._get_hierarchy_filename(
                                        root_name, system.namespace, global_root_counts
                                    )
                                    hierarchy_link = (
                                        f"[view](../hierarchies/{hierarchy_file})"
                                    )
                                else:
                                    hierarchy_link = "—"
                                # Get character and dimension from target
                                character = (
                                    target_qty.character
                                    if hasattr(target_qty, "character")
                                    else "Real"
                                )
                                dim_formula = (
                                    target_qty.dimensional_formula
                                    if hasattr(target_qty, "dimensional_formula")
                                    and target_qty.dimensional_formula
                                    else "—"
                                )
                                # Use C++ extracted kind_of from target and linkify it
                                if (
                                    hasattr(target_qty, "kind_of")
                                    and target_qty.kind_of
                                ):
                                    kind_of = f"<code>{self._linkify_definition(target_qty.kind_of, system)}</code>"
                                else:
                                    kind_of = "—"
                                # Use C++ extracted parent from target and linkify it
                                parent_cpp = (
                                    target_qty.parent_from_cpp
                                    if hasattr(target_qty, "parent_from_cpp")
                                    and target_qty.parent_from_cpp
                                    else ""
                                )
                                if parent_cpp and parent_cpp != "<root>":
                                    parent_display = f"<code>{self._linkify_definition(parent_cpp, system)}</code>"
                                else:
                                    parent_display = "—"
                            else:
                                hierarchy_link = "—"
                                kind_of = "—"
                                parent_display = "—"

                            qty_name_display = add_word_breaks(qty.name)
                            f.write(
                                f'| <span id="{qty.name}"></span><code>{qty_name_display}</code> | '
                                f"{character} | {dim_formula} | {kind_of} | "
                                f"{parent_display} | alias to {self._linkify_definition(qty.alias_target, system)} | "
                                f"{hierarchy_link} |\n"
                            )
                        else:
                            # Get root name from kind_of (e.g., 'isq::length' -> 'length')
                            root_name = (
                                qty.kind_of.split("::")[-1]
                                if hasattr(qty, "kind_of") and qty.kind_of
                                else "dimensionless"
                            )
                            # Only generate hierarchy link if this root actually exists
                            if root_name in global_root_counts:
                                hierarchy_file = self._get_hierarchy_filename(
                                    root_name, system.namespace, global_root_counts
                                )
                                hierarchy_link = (
                                    f"[view](../hierarchies/{hierarchy_file})"
                                )
                            else:
                                hierarchy_link = "—"
                            # Use C++ extracted kind_of and linkify it
                            if hasattr(qty, "kind_of") and qty.kind_of:
                                kind_of = f"<code>{self._linkify_definition(qty.kind_of, system)}</code>"
                            else:
                                kind_of = "—"
                            # Use C++ extracted parent and linkify it
                            parent_cpp = (
                                qty.parent_from_cpp
                                if hasattr(qty, "parent_from_cpp")
                                and qty.parent_from_cpp
                                else ""
                            )
                            if parent_cpp and parent_cpp != "<root>":
                                parent_display = f"<code>{self._linkify_definition(parent_cpp, system)}</code>"
                            else:
                                parent_display = "—"
                            # Normalize and linkify equation
                            if qty.equation:
                                normalized = self._normalize_equation(qty.equation)
                                equation = f"<code>{self._linkify_definition(normalized, system)}</code>"
                            else:
                                equation = "—"
                            qty_name_display = add_word_breaks(qty.name)
                            f.write(
                                f'| <span id="{qty.name}"></span><code>{qty_name_display}</code> | '
                                f"{character} | {dim_formula} | {kind_of} | "
                                f"{parent_display} | {equation} | {hierarchy_link} |\n"
                            )
                    need_separator = True

                # Units - separate non-SI (for SI system only)
                regular_units = [
                    u for u in system.units if u.origin_namespace != "non_si"
                ]
                non_si_units = (
                    [u for u in system.units if u.origin_namespace == "non_si"]
                    if namespace == "si"
                    else []
                )

                if regular_units:
                    if need_separator:
                        f.write("\n")
                    f.write("## Units\n\n")
                    f.write("| Unit Name | Symbol | unit_symbol | Definition |\n")
                    f.write("|-----------|:------:|:-----------:|------------|\n")
                    for unit in sorted(regular_units, key=lambda u: u.name):
                        self._write_unit_row(f, unit, system)
                    need_separator = True

                if non_si_units:
                    if need_separator:
                        f.write("\n")
                    f.write("## Non-SI units accepted for use with the SI\n\n")
                    f.write("| Unit Name | Symbol | unit_symbol | Definition |\n")
                    f.write("|-----------|:------:|:-----------:|------------|\n")
                    for unit in sorted(non_si_units, key=lambda u: u.name):
                        self._write_unit_row(f, unit, system)
                    need_separator = True

                # Prefixes
                if system.prefixes:
                    if need_separator:
                        f.write("\n")
                    f.write("## Prefixes\n\n")
                    f.write("| Name | Symbol | Definition |\n")
                    f.write("|------|:------:|------------|\n")
                    for prefix in sorted(
                        system.prefixes, key=lambda p: self._get_prefix_magnitude(p)
                    ):
                        definition = prefix.definition.replace("|", "\\|")
                        f.write(
                            f'| <span id="{prefix.name}"></span>`{prefix.name}` | {prefix.symbol} | `{definition}` |\n'
                        )
                    need_separator = True

                # Point Origins
                if system.point_origins:
                    if need_separator:
                        f.write("\n")
                    f.write("## Point Origins\n\n")
                    f.write("| Name | Type | Definition |\n")
                    f.write("|------|:----:|------------|\n")
                    for origin in sorted(system.point_origins, key=lambda o: o.name):
                        if origin.alias_target:
                            # This is an alias - show reference to original (linkified)
                            alias_target_linked = self._linkify_definition(
                                origin.alias_target, system
                            )
                            f.write(
                                f'| <span id="{origin.name}"></span>`{origin.name}` | — | '
                                f"alias to {alias_target_linked} |\n"
                            )
                        else:
                            # Regular definition - linkify and wrap in code tags
                            definition = origin.definition.replace("|", "\\|")
                            definition_linked = self._linkify_definition(
                                definition, system
                            )
                            f.write(
                                f'| <span id="{origin.name}"></span>`{origin.name}` | '
                                f"{origin.origin_type} | <code>{definition_linked}</code> |\n"
                            )
                    # Note: Last section, no need_separator update needed

    def _write_unit_row(self, f, unit: Unit, system: SystemInfo):
        """Write a unit table row"""

        # Helper to add word breaks to long identifiers
        def add_word_breaks(name: str) -> str:
            if "_" in name:
                return name.replace("_", "_<wbr>")
            return name

        # Show relative namespace prefix if in subnamespace
        unit_display = (
            f"{unit.subnamespace}::{unit.name}" if unit.subnamespace else unit.name
        )
        unit_display_with_breaks = add_word_breaks(unit_display)

        if unit.alias_target:
            # This is an alias - show reference to original (linkified)
            alias_target_linked = self._linkify_definition(unit.alias_target, system)
            f.write(
                f'| <span id="{unit.name}"></span><code>{unit_display_with_breaks}</code> | — | — | alias to {alias_target_linked} |\n'
            )
        else:
            # Regular definition
            # Handle multiple unit_symbols
            if unit.unit_symbols:
                # Multiple unit_symbols: display as comma-separated list in backticks
                short_symbol = ", ".join(f"`{s}`" for s in unit.unit_symbols)
            else:
                short_symbol = "—"

            symbol = unit.symbol if unit.symbol else "—"

            # Process definition to add namespace prefixes for units from same subnamespace
            definition = self._add_namespace_prefixes_to_definition(unit)

            # Format kind_of constraint - handle multiple patterns:
            # 1. "..., kind_of<...>" (comma before kind_of)
            # 2. "kind_of<...>" (starts with kind_of - e.g., ampere, metre)
            # 3. "kind_of<...>, other" (kind_of followed by comma - e.g., kelvin with origin)
            kind_line = ""

            # Check for pattern: "kind_of<...>, ..." or "kind_of<...>"
            if definition.startswith("kind_of<"):
                # Find the closing > for kind_of
                kind_end = definition.find(">")
                if kind_end != -1:
                    kind_content = definition[
                        8:kind_end
                    ]  # Extract content between kind_of< and >
                    # Linkify the kind content
                    kind_line = f"kind: `{kind_content}`"
                    # Remove the kind_of<...> part (and comma if present)
                    rest = definition[kind_end + 1 :].strip()
                    if rest.startswith(","):
                        rest = rest[1:].strip()
                    definition = rest
            # Check for pattern: "..., kind_of<...>"
            elif ", kind_of<" in definition:
                parts = definition.rsplit(", kind_of<", 1)
                if len(parts) == 2:
                    definition = parts[0].strip()
                    kind_part = parts[1].strip()
                    if kind_part.endswith(">"):
                        kind_content = kind_part[:-1]  # Remove trailing >
                        # Linkify the kind content
                        kind_line = f"kind: `{kind_content}`"

            # Format offset unit origins more clearly (only for the 4 offset units)
            # After extracting kind, check if remaining definition has an origin
            origin_line = ""
            if definition:
                # Check if it's a simple origin identifier (no operators, just a name)
                # This handles kelvin which becomes just "zeroth_kelvin" after kind extraction
                if (
                    (
                        "zeroth_" in definition
                        or definition in ["absolute_zero", "ice_point"]
                    )
                    and not any(
                        op in definition for op in ["*", "/", "+", "-", "(", ")"]
                    )
                    and ", " not in definition
                ):
                    # Linkify the origin
                    origin_line = f"origin: `{definition}`"
                    definition = ""
                # Check for "unit, origin" pattern (e.g., "kelvin, zeroth_degree_Celsius")
                elif ", " in definition:
                    parts = definition.rsplit(", ", 1)
                    if len(parts) == 2:
                        base_def = parts[0].strip()
                        origin = parts[1].strip()
                        # Check if origin looks like a point origin (not a regular expression with commas)
                        if (
                            "zeroth_" in origin
                            or "point<" in origin
                            or origin in ["absolute_zero", "ice_point"]
                        ) and "<" not in origin.replace("point<", ""):
                            definition = base_def
                            # Store origin without backticks - will be linkified later
                            origin_line = f"origin: {origin}"

            # Linkify the main definition (before wrapping in backticks)
            if definition:
                definition = self._linkify_definition(definition, system)
                # Actually, since links have [`text`](url) format, we don't wrap the whole thing
                # definition = f"`{definition}`"  # Don't do this - links already have backticks

            # Linkify kind_line content
            if kind_line:
                # Extract the content after "kind: "
                kind_content = kind_line.replace("kind: ", "").replace("`", "")
                kind_content_linked = self._linkify_definition(kind_content, system)
                kind_line = f"kind: {kind_content_linked}"

            # Linkify origin_line content
            if origin_line:
                # Extract the content after "origin: "
                origin_content = origin_line.replace("origin: ", "").replace("`", "")
                origin_content_linked = self._linkify_definition(origin_content, system)
                origin_line = f"origin: {origin_content_linked}"

            # Build the final definition cell with line breaks
            parts = []
            if definition:
                parts.append(definition)
            if kind_line:
                parts.append(kind_line)
            if origin_line:
                parts.append(origin_line)

            # Join parts and escape pipes
            if parts:
                definition_cell = "<br>".join(parts)
                definition_cell = definition_cell.replace("|", "\\|")
                # Wrap in HTML code tags to preserve code font for non-linked parts
                definition_cell = f"<code>{definition_cell}</code>"
            else:
                definition_cell = "—"

            # unit_symbol_cell is already formatted with backticks or em-dash
            f.write(
                f'| <span id="{unit.name}"></span><code>{unit_display_with_breaks}</code> | {symbol} | '
                f"{short_symbol} | {definition_cell} |\n"
            )

    def _add_namespace_prefixes_to_definition(self, unit: Unit) -> str:
        """Add namespace prefixes to unit references in definition if they're from same subnamespace"""
        if not unit.subnamespace:
            return unit.definition

        definition = unit.definition

        # Find all units in the same system and subnamespace
        system_namespace = unit.namespace.replace(f"::{unit.subnamespace}", "").replace(
            "mp_units::", ""
        )
        system = self.parser.systems.get(system_namespace)

        if not system:
            return definition

        # Get all unit names from the same subnamespace, sorted by length (longest first)
        # to avoid replacing substrings of longer names
        sibling_units = [
            u.name
            for u in system.units
            if u.subnamespace == unit.subnamespace and u.name != unit.name
        ]
        sibling_units.sort(key=len, reverse=True)

        # Replace unit names with prefixed versions using word boundaries
        for sibling_name in sibling_units:
            # Use word boundary pattern to avoid partial matches
            pattern = r"\b" + re.escape(sibling_name) + r"\b"
            replacement = f"{unit.subnamespace}::{sibling_name}"
            definition = re.sub(pattern, replacement, definition)

        return definition

    def _linkify_definition(self, definition: str, current_system: SystemInfo) -> str:
        """Convert unit/quantity/origin references in definition to markdown links while preserving code font.

        Converts references like `yard` to [`yard`](#yard) or [`si::metre`](si.md#metre).
        Preserves operators, numbers, and template syntax without linkification.
        """
        if not definition:
            return definition

        # Pattern to match identifiers (including namespace-qualified ones)
        # Matches: word, namespace::word, nested::namespace::word
        # Allow both lowercase and uppercase letters for names like Julian_year and zeroth_degree_Celsius
        identifier_pattern = (
            r"\b([a-zA-Z_][a-zA-Z0-9_]*(?:::[a-zA-Z_][a-zA-Z0-9_]*)*)\b"
        )

        # Collect all possible references from all systems
        all_refs = {}  # name -> (system_namespace, anchor_name)

        for sys_ns, system in self.parser.systems.items():
            # Add units
            for unit in system.units:
                all_refs[unit.name] = (sys_ns, unit.name)
                # Also add qualified names
                all_refs[f"{sys_ns}::{unit.name}"] = (sys_ns, unit.name)

            # Add point origins
            for origin in system.point_origins:
                all_refs[origin.name] = (sys_ns, origin.name)
                all_refs[f"{sys_ns}::{origin.name}"] = (sys_ns, origin.name)

            # Add quantities
            for qty in system.quantities:
                all_refs[qty.name] = (sys_ns, qty.name)
                all_refs[f"{sys_ns}::{qty.name}"] = (sys_ns, qty.name)

            # Add prefixes
            for prefix in system.prefixes:
                # Prefixes link to their specific anchor
                all_refs[prefix.name] = (sys_ns, prefix.name)
                all_refs[f"{sys_ns}::{prefix.name}"] = (sys_ns, prefix.name)

        def replace_identifier(match):
            identifier = match.group(1)

            # Skip keywords and known functions/templates
            skip_words = {
                "mag",
                "mag_ratio",
                "mag_power",
                "kind_of",
                "kind",
                "square",
                "cubic",
                "pow",
                "sqrt",
                "cbrt",
                "abs",
                "inverse",
                "ratio",
                "power",
                "root",
                "si",
                "isq",
                "iec",
                "usc",
                "cgs",
                "iau",
                "hep",
                "imperial",
                "non_si",
                "typographic",
                "angular",
                "natural",
                "isq_angle",
                "mp_units",
                "mp_units::point",  # Template functions
            }

            if identifier in skip_words:
                return match.group(0)

            # If it's a qualified name, check if we should strip the namespace prefix
            display_text = identifier
            if "::" in identifier:
                parts = identifier.split("::")
                namespace_prefix = "::".join(parts[:-1])
                unqualified_name = parts[-1]

                # Strip namespace if it matches current system
                if namespace_prefix == current_system.namespace:
                    display_text = unqualified_name

            # Helper function to create link with word breaks for long identifiers
            def make_link(text, url):
                # For identifiers with underscores, insert <wbr> at underscores to allow breaking
                # Markdown link text supports HTML, so we can use <wbr> tags in the text
                if "_" in text:
                    # Insert <wbr> after each underscore for better line breaking
                    text_with_breaks = text.replace("_", "_<wbr>")
                    return f"[{text_with_breaks}]({url})"
                else:
                    # Standard markdown link
                    return f"[{text}]({url})"

            # Check if this identifier is a reference we can link
            # First check in current system for the unqualified name
            unqualified_check = (
                identifier.split("::")[-1] if "::" in identifier else identifier
            )

            # Build a key to check current system first
            current_sys_key = f"{current_system.namespace}::{unqualified_check}"

            # Special case: dimensionless is only defined in core, never link to self in other systems
            if (
                unqualified_check == "dimensionless"
                and current_system.namespace != "core"
            ):
                # Always link to core for dimensionless
                if "core::dimensionless" in all_refs:
                    return make_link(display_text, "core.md#dimensionless")
                elif "dimensionless" in all_refs:
                    target_sys, anchor = all_refs["dimensionless"]
                    return make_link(display_text, f"{target_sys}.md#{anchor}")

            if current_sys_key in all_refs:
                # Found in current system
                target_sys, anchor = all_refs[current_sys_key]
                return make_link(display_text, f"#{anchor}")
            elif identifier in all_refs:
                # Found with full identifier
                target_sys, anchor = all_refs[identifier]

                # Determine if same system or cross-system
                if target_sys == current_system.namespace:
                    # Same system - use anchor link
                    return make_link(display_text, f"#{anchor}")
                else:
                    # Cross-system - use relative link
                    return make_link(display_text, f"{target_sys}.md#{anchor}")

            # If it's a qualified name that wasn't found, try the unqualified name
            # (handles cases like iau::astronomical_unit where the unit is in si)
            if "::" in identifier:
                parts = identifier.split("::")
                unqualified_name = parts[-1]
                if unqualified_name in all_refs:
                    target_sys, anchor = all_refs[unqualified_name]
                    # Use the actual system where the unit is defined
                    if target_sys == current_system.namespace:
                        return make_link(display_text, f"#{anchor}")
                    else:
                        return make_link(display_text, f"{target_sys}.md#{anchor}")

            # Not a linkable reference, return as-is
            return match.group(0)

        # Apply the replacement
        result = re.sub(identifier_pattern, replace_identifier, definition)

        # Add word break opportunities at natural break points to prevent table overflow
        # Only add after operators, not within markdown link syntax
        result = result.replace(" / ", " / <wbr>")
        result = result.replace(" * ", " * <wbr>")

        return result

    def _write_quantity_tree(self, f, qty: Quantity, qty_children: dict, indent: int):
        """Write quantity hierarchy tree recursively"""
        prefix = "  " * indent + ("├─ " if indent > 0 else "")
        kind_marker = " [kind]" if qty.is_kind else ""
        equation_info = f" = {qty.equation}" if qty.equation else ""
        f.write(f"{prefix}{qty.name}{kind_marker}{equation_info}\n")

        children = sorted(qty_children.get(qty.name, []), key=lambda q: q.name)
        for child in children:
            self._write_quantity_tree(f, child, qty_children, indent + 1)

    def _normalize_equation(self, equation: str) -> str:
        """Normalize equation formatting by ensuring proper spacing around operators"""
        if not equation:
            return equation
        # Add space before * if not already there
        equation = re.sub(r"(\w)(\*)", r"\1 \2", equation)
        # Add space after * if not already there
        equation = re.sub(r"(\*)(\w)", r"\1 \2", equation)
        # Add space before / if not already there
        equation = re.sub(r"(\w)(\/)", r"\1 \2", equation)
        # Add space after / if not already there
        equation = re.sub(r"(\/)(\w)", r"\1 \2", equation)
        return equation

    def _build_mermaid_hierarchy(
        self, root_name: str, qty_children: dict, qualified_quantities: list
    ) -> str:
        """Build Mermaid flowchart for quantity hierarchy using qualified names"""
        lines = ["```mermaid", "flowchart LR"]

        # Build a map of qualified_name -> quantity
        qty_map = {qname: qty for qname, qty in qualified_quantities}

        # Build a map of aliases: target_qualified_name -> [alias_qualified_names]
        aliases_map = defaultdict(list)
        for qname, qty in qualified_quantities:
            if qty.alias_target:
                # Find the target's qualified name
                sys_ns = qty.namespace.replace("mp_units::", "")
                if qty.alias_target == "dimensionless":
                    target_qname = "dimensionless"
                else:
                    target_qname = f"{sys_ns}::{qty.alias_target}"
                aliases_map[target_qname].append(qname)

        def add_node(qualified_name: str, parent_id: str = None):
            qty = qty_map.get(qualified_name)
            if not qty:
                return

            # Skip if this is an alias (will be included in target's box)
            if qty.alias_target:
                return

            # Create node ID from qualified name (sanitized for Mermaid)
            node_id = qualified_name.replace("::", "_").replace("-", "_")

            # Build node label - use the qualified name directly (no prefix for dimensionless)
            name_display = qualified_name

            # Add aliases to the name (e.g., "isq::height | isq::depth | isq::altitude")
            if qualified_name in aliases_map:
                alias_names = sorted(aliases_map[qualified_name])
                name_display = name_display + " | " + " | ".join(alias_names)

            # Format equation in italics inside parentheses with normalized spacing
            equation = ""
            if qty.equation:
                normalized_eq = self._normalize_equation(qty.equation)
                equation = f"<br><i>({normalized_eq})</i>"

            label = f"<b>{name_display}</b>{equation}"

            # Add node definition
            lines.append(f'    {node_id}["{label}"]')

            # Add edge from parent
            if parent_id:
                lines.append(f"    {parent_id} --- {node_id}")

            # Process children (excluding aliases) - children are (qualified_name, qty) tuples
            children = sorted(qty_children.get(qualified_name, []), key=lambda x: x[0])
            for child_qname, child_qty in children:
                add_node(child_qname, node_id)

        # Start with the root (which should be the qualified root name, or just 'dimensionless')
        if root_name == "dimensionless":
            add_node("dimensionless")
        else:
            # Find the first root with this name
            for qname, qty in qualified_quantities:
                is_root = (
                    hasattr(qty, "parent_from_cpp") and qty.parent_from_cpp == "<root>"
                )
                if qty.name == root_name and is_root:
                    add_node(qname)
                    break

        lines.append("```")
        return "\n".join(lines)


class CppMetadataExtractor:
    """Extract quantity metadata by compiling and running a C++ program"""

    def __init__(self, parser: SystemsParser, source_dir: Path):
        self.parser = parser
        self.source_dir = source_dir
        self.metadata = (
            {}
        )  # (namespace, qty_name) -> {dimension, kind_of, parent, character}

    def extract_metadata(self):
        """Generate C++ program, compile it, run it, and parse the output"""
        import shutil
        import subprocess
        import tempfile

        # Generate C++ program
        cpp_code = self._generate_cpp_program()

        # Write to temporary file
        with tempfile.NamedTemporaryFile(mode="w", suffix=".cpp", delete=False) as f:
            cpp_file = Path(f.name)
            f.write(cpp_code)

        # Find available C++ compiler (try multiple options)
        compiler = None
        for candidate in [
            "g++-14",
            "g++-13",
            "g++-12",
            "g++",
            "clang++-18",
            "clang++-17",
            "clang++",
            "c++",
        ]:
            if shutil.which(candidate):
                compiler = candidate
                break

        if not compiler:
            print("Warning: No suitable C++23 compiler found (tried g++, clang++, c++)")
            print("         Quantity metadata extraction skipped")
            cpp_file.unlink()
            return

        # Compile
        exe_file = cpp_file.with_suffix("")
        compilation_failed = False
        try:
            compile_cmd = [
                compiler,
                "-std=c++23",
                f"-I{self.source_dir}/src/core/include",
                f"-I{self.source_dir}/src/systems/include",
                str(cpp_file),
                "-o",
                str(exe_file),
            ]
            result = subprocess.run(
                compile_cmd, capture_output=True, text=True, timeout=60
            )
            if result.returncode != 0:
                compilation_failed = True
                print(
                    f"Warning: Failed to compile metadata extraction program with {compiler}:"
                )
                print(result.stderr)
                print(f"         C++ file preserved for inspection: {cpp_file}")
                return

            # Run and capture output
            result = subprocess.run(
                [str(exe_file)], capture_output=True, text=True, timeout=10
            )
            if result.returncode != 0:
                print("Warning: Failed to run metadata extraction program:")
                print(result.stderr)
                return

            # Parse output
            self._parse_output(result.stdout)

        finally:
            # Cleanup - preserve .cpp file only on compilation failure for debugging
            if cpp_file.exists() and not compilation_failed:
                cpp_file.unlink()
            if exe_file.exists():
                exe_file.unlink()

    def _generate_cpp_program(self) -> str:
        """Generate C++ program that outputs metadata for all quantities"""
        lines = [
            "// Auto-generated program to extract quantity metadata",
            "#include <mp-units/systems/isq.h>",
            "#include <mp-units/systems/isq_angle.h>",
            "#include <mp-units/systems/angular.h>",
            "#include <mp-units/systems/natural.h>",
            "#include <mp-units/systems/iec80000.h>",
            "#include <iostream>",
            "",
            "using namespace mp_units;",
            "",
            "constexpr std::string_view get_parent(QuantitySpec auto qs)",
            "{",
            "  if constexpr (requires { qs._parent_; })",
            "    return detail::type_name<std::remove_const_t<decltype(qs._parent_)>>();",
            "  else",
            '    return "<root>";',
            "}",
            "",
            "constexpr std::string_view character_to_string(quantity_character ch)",
            "{",
            "  switch (ch) {",
            "    case quantity_character::real_scalar:",
            '      return "Real";',
            "    case quantity_character::complex_scalar:",
            '      return "Complex";',
            "    case quantity_character::vector:",
            '      return "Vector";',
            "    case quantity_character::tensor:",
            '      return "Tensor";',
            "    default:",
            '      return "Unknown";',
            "  }",
            "}",
            "",
            "template<QuantitySpec QS>",
            "void print_quantity(std::string_view namespace_name, std::string_view name, QS qs)",
            "{",
            '  std::cout << namespace_name << ","',
            '            << name << ","',
            '            << character_to_string(qs.character) << ","',
            '            << dimension_symbol(qs.dimension) << ","',
            '            << detail::type_name<decltype(get_kind(qs))>() << ","',
            "            << get_parent(qs) << '\\n';",
            "}",
            "",
            "#define PRINT_QTY(ns, qty) print_quantity(#ns, #qty, ns::qty)",
            "",
            "int main()",
            "{",
        ]

        # Add dimensionless (special case - no namespace)
        lines.append('  print_quantity("", "dimensionless", dimensionless);')

        # Add all quantities from each system
        for system_key, system in self.parser.systems.items():
            if system.quantities:
                for qty in sorted(system.quantities, key=lambda q: q.name):
                    if not qty.alias_target:  # Skip aliases
                        if system.namespace:  # Regular namespace
                            lines.append(
                                f"  PRINT_QTY({system.namespace}, {qty.name});"
                            )
                        else:  # Empty namespace (core) = mp_units::
                            # dimensionless is already handled above
                            if qty.name != "dimensionless":
                                lines.append(
                                    f'  print_quantity("", "{qty.name}", {qty.name});'
                                )

        lines.extend(
            [
                "  return 0;",
                "}",
            ]
        )

        return "\n".join(lines)

    def _parse_output(self, output: str):
        """Parse C++ program output and store metadata"""
        for line in output.strip().split("\n"):
            if not line:
                continue

            # Format: namespace,name,character,dimension_symbol,kind_of_type,parent_type
            parts = line.split(",", 5)
            if len(parts) != 6:
                continue

            namespace, name, character, dim_symbol, kind_of, parent = parts

            # Extract kind_of from: mp_units::kind_of_<mp_units::isq::length>; std::string_view = ...
            # We want: isq::length
            kind_match = kind_of.split("mp_units::kind_of_<mp_units::")
            if len(kind_match) > 1:
                kind_str = kind_match[1].split(">")[0]
            else:
                kind_str = ""

            # Extract parent from: mp_units::isq::length; std::string_view = ...
            # We want: isq::length
            if "<root>" in parent:
                parent_str = "<root>"
            else:
                parent_match = parent.split("mp_units::")
                if len(parent_match) > 1:
                    parent_str = parent_match[1].split(";")[0]
                else:
                    parent_str = parent

            # Handle dimensionless specially
            if not namespace:
                namespace = ""

            # Store metadata
            key = (namespace, name)
            self.metadata[key] = {
                "dimension": dim_symbol,
                "kind_of": kind_str,
                "parent": parent_str,
                "character": character,
            }

    def apply_metadata(self):
        """Apply extracted metadata to quantity objects"""
        for namespace, system in self.parser.systems.items():
            for qty in system.quantities:
                key = (namespace, qty.name)
                if key in self.metadata:
                    meta = self.metadata[key]
                    qty.dimensional_formula = (
                        meta["dimension"] if meta["dimension"] != "1" else "—"
                    )
                    if (
                        qty.dimensional_formula == "—"
                        and namespace == ""
                        and qty.name == "dimensionless"
                    ):
                        qty.dimensional_formula = "1"
                    # Apply all metadata from C++ extraction
                    qty.character = meta["character"]
                    qty.kind_of = meta["kind_of"]
                    qty.parent_from_cpp = meta["parent"]


def main():
    """Main entry point"""
    script_dir = Path(__file__).parent
    project_root = script_dir.parent
    systems_dir = project_root / "src" / "systems" / "include" / "mp-units" / "systems"
    output_dir = project_root / "docs" / "reference" / "systems_reference"
    mkdocs_file = project_root / "mkdocs.yml"

    if not systems_dir.exists():
        print(f"Error: Systems directory not found: {systems_dir}", file=sys.stderr)
        return 1

    print(f"Parsing systems from: {systems_dir}")
    parser = SystemsParser(systems_dir)
    parser.parse_all_systems()

    print(f"Found {len(parser.systems)} systems")
    for ns, system in parser.systems.items():
        base_units = len([u for u in system.units if u.is_base])
        derived_units = len(
            [u for u in system.units if not u.is_base and not u.is_alias]
        )
        point_origins = len(system.point_origins)
        prefixes = len(system.prefixes)
        print(
            f"  - {ns}: {len(system.dimensions)} dims, "
            f"{len(system.quantities)} qtys, {base_units} base units, "
            f"{derived_units} derived units, {point_origins} point origins, "
            f"{prefixes} prefixes"
        )

    print("\nExtracting metadata from C++...")
    extractor = CppMetadataExtractor(parser, project_root)
    extractor.extract_metadata()
    extractor.apply_metadata()
    print(f"  ✓ Extracted metadata for {len(extractor.metadata)} quantities")

    print(f"\nGenerating documentation to: {output_dir}")
    generator = DocumentationGenerator(parser, output_dir)
    generator.generate_systems_index()
    generator.generate_dimensions_index()
    generator.generate_quantities_index()
    generator.generate_units_index()
    generator.generate_prefixes_index()
    generator.generate_point_origins_index()
    hierarchy_count = generator.generate_cross_system_hierarchies()
    generator.generate_hierarchies_overview()
    generator.generate_per_system_pages()

    print("\nUpdating mkdocs configuration...")
    generator.update_mkdocs_config(mkdocs_file)

    total_dims = sum(len(s.dimensions) for s in parser.systems.values())
    total_qtys = sum(len(s.quantities) for s in parser.systems.values())
    total_units = sum(len(s.units) for s in parser.systems.values())
    total_origins = sum(len(s.point_origins) for s in parser.systems.values())
    total_prefixes = sum(len(s.prefixes) for s in parser.systems.values())

    print("\nDocumentation generation complete!")
    print(f"  - {len(parser.systems)} systems processed")
    print(f"  - {total_dims} dimensions documented")
    print(f"  - {total_qtys} quantities documented")
    print(f"  - {hierarchy_count} quantity hierarchies documented")
    print(f"  - {total_units} units documented")
    print(f"  - {total_origins} point origins documented")
    print(f"  - {total_prefixes} prefixes documented")

    return 0


def compute_source_hash(source_root: Path) -> str:
    """Compute hash of all source files to detect changes"""
    hasher = hashlib.sha256()

    # Hash all header files that could affect generation
    # Only systems headers + unit.h (for core dimensionless units)
    patterns = [
        "src/systems/include/mp-units/systems/**/*.h",
        "src/core/include/mp-units/framework/unit.h",
    ]

    files = []
    for pattern in patterns:
        files.extend(sorted(source_root.glob(pattern)))

    for file_path in files:
        hasher.update(str(file_path.relative_to(source_root)).encode())
        hasher.update(file_path.read_bytes())

    return hasher.hexdigest()


def should_regenerate(source_root: Path, cache_file: Path) -> bool:
    """Check if regeneration is needed based on source file changes"""
    if not cache_file.exists():
        return True

    try:
        with open(cache_file, "r") as f:
            cache_data = json.load(f)
            cached_hash = cache_data.get("source_hash", "")
    except (json.JSONDecodeError, OSError):
        return True

    current_hash = compute_source_hash(source_root)
    return current_hash != cached_hash


def save_cache(source_root: Path, cache_file: Path):
    """Save current source hash to cache"""
    cache_file.parent.mkdir(parents=True, exist_ok=True)
    current_hash = compute_source_hash(source_root)

    with open(cache_file, "w") as f:
        json.dump({"source_hash": current_hash}, f, indent=2)
        f.write("\n")  # Add trailing newline for pre-commit


def generate_if_needed(source_root: Path, force: bool = False) -> int:
    """Generate documentation only if sources changed or forced"""
    cache_file = source_root / "docs/reference/systems_reference/.cache.json"

    if not force and not should_regenerate(source_root, cache_file):
        print("Systems reference documentation is up to date (sources unchanged)")
        return 0

    print("Generating systems reference documentation...")
    result = main()

    if result == 0:
        save_cache(source_root, cache_file)

    return result


# MkDocs hook integration
def on_pre_build(config, **kwargs):
    """MkDocs hook: generate systems reference before building docs"""
    # Determine source root from mkdocs config
    docs_dir = Path(config["docs_dir"])
    source_root = docs_dir.parent

    return generate_if_needed(source_root, force=False)


if __name__ == "__main__":
    # Support both direct execution and --force flag
    force = "--force" in sys.argv
    source_root = Path(__file__).parent.parent

    sys.exit(generate_if_needed(source_root, force=force))