This is an automated email from the ASF dual-hosted git repository.

acassis pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git

commit 754dfd980bc84d15fe040dbbfd4af797aa07b713
Author: Xiang Xiao <[email protected]>
AuthorDate: Wed Mar 11 03:05:52 2026 +0800

    arch/sim: replace macOS C++ constructor runtime hack with post-link patch
    
    The sim architecture needs to defer C++ global constructors until after
    NuttX kernel initialization completes. On macOS this was previously done
    via a runtime hack (sim_macho_init.c): a __attribute__((constructor))
    function intercepted constructors, saved them, and replayed them later.
    That approach was fragile because it depended on constructor ordering
    and required mprotect() to patch the read-only __mod_init_func section
    at runtime.
    
    This commit replaces the runtime hack with a post-link patching scheme:
    
      1. Link with -Wl,-ld_classic,-no_fixup_chains to keep the classic
         __mod_init_func pointer format (prevents ld64 from converting it
         to __init_offsets).
      2. Post-link, run patch_macho_initsection.py (python3 + lief) to
         patch Mach-O section type flags from
         MOD_INIT_FUNC_POINTERS/INIT_FUNC_OFFSETS to REGULAR, so dyld
         skips them entirely.
      3. Use the Mach-O auto-generated boundary symbols
         section$start$__DATA_CONST$__mod_init_func /
         section$end$__DATA_CONST$__mod_init_func, mapped via __asm()
         labels in arch/sim/include/arch.h to the common _sinit[]/_einit[]
         names used by lib_cxx_initialize().
    
    Linux behavior is unchanged.
    
    Changes:
      - arch/sim/include/arch.h: add macOS __asm() declarations for
        _sinit/_einit
      - arch/sim/src/Makefile: drop sim_macho_init.c HEADSRC handling,
        always pass -ld_classic,-no_fixup_chains on macOS, run
        patch_macho_initsection.py after link when CONFIG_HAVE_CXXINITIALIZE
      - arch/sim/src/sim/CMakeLists.txt: same for the CMake build
      - arch/sim/src/patch_macho_initsection.py: new lief-based patcher
      - arch/sim/src/sim/posix/sim_macho_init.c: deleted (135-line hack)
      - libs/libc/misc/lib_cxx_initialize.c: remove
        macho_call_saved_init_funcs special case; single unified loop
    
    Testing:
      - macOS (Ventura VM): verified lief patching with standalone test
        (test_sinit7) confirming the constructor is deferred past main()
        and only invoked when explicitly called via the _sinit/_einit loop.
    
    Signed-off-by: Xiang Xiao <[email protected]>
---
 .codespellrc                            |   1 +
 arch/sim/include/arch.h                 |  38 +++++++++
 arch/sim/src/Makefile                   |  22 ++----
 arch/sim/src/patch_macho_initsection.py |  72 +++++++++++++++++
 arch/sim/src/sim/CMakeLists.txt         |  19 +++--
 arch/sim/src/sim/posix/sim_macho_init.c | 135 --------------------------------
 libs/libc/misc/lib_cxx_initialize.c     |   8 --
 7 files changed, 130 insertions(+), 165 deletions(-)

diff --git a/.codespellrc b/.codespellrc
index cf28f7d9663..e532eb60df4 100644
--- a/.codespellrc
+++ b/.codespellrc
@@ -41,6 +41,7 @@ ignore-words-list =
   infor,
   inport,
   ist,
+  lief,
   lod,
   mot,
   mis,
diff --git a/arch/sim/include/arch.h b/arch/sim/include/arch.h
index d83f17fa1ce..1634e63c4a0 100644
--- a/arch/sim/include/arch.h
+++ b/arch/sim/include/arch.h
@@ -27,4 +27,42 @@
 #ifndef __ARCH_SIM_INCLUDE_ARCH_H
 #define __ARCH_SIM_INCLUDE_ARCH_H
 
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#ifdef __cplusplus
+#define EXTERN extern "C"
+extern "C"
+{
+#else
+#define EXTERN extern
+#endif
+
+/****************************************************************************
+ * Public Data
+ ****************************************************************************/
+
+/* _sinit and _einit mark the beginning and end of the C++ constructor
+ * array.  They are mapped to platform-specific linker symbols:
+ *   macOS:  section$start$__DATA_CONST$__mod_init_func /
+ *           section$end$__DATA_CONST$__mod_init_func (Mach-O auto)
+ * On macOS, the section type flags are patched post-link to prevent
+ * dyld from auto-running constructors before NuttX is initialized.
+ */
+
+#ifdef CONFIG_HAVE_CXXINITIALIZE
+#  if defined(CONFIG_HOST_MACOS)
+extern void (*_sinit[])(void)
+  __asm("section$start$__DATA_CONST$__mod_init_func");
+extern void (*_einit[])(void)
+  __asm("section$end$__DATA_CONST$__mod_init_func");
+#  endif
+#endif
+
+#undef EXTERN
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* __ARCH_SIM_INCLUDE_ARCH_H */
diff --git a/arch/sim/src/Makefile b/arch/sim/src/Makefile
index e50dc83d720..d3857e52025 100644
--- a/arch/sim/src/Makefile
+++ b/arch/sim/src/Makefile
@@ -141,24 +141,15 @@ sim_hostfs.c: hostfs.h
 
 STDLIBS += -lpthread
 ifeq ($(CONFIG_HOST_MACOS),y)
-ifeq ($(CONFIG_HAVE_CXXINITIALIZE),y)
-  # Note: sim_macho_init.c is not in CSRCS because it's picky about
-  # the place in the object list for linking. Namely, its constructor
-  # should be the first one in the executable.
-  HEADSRC = sim_macho_init.c
-
-  # sim_macho_init.c is not compatible with chained fixups.
-  # cf. https://github.com/apache/nuttx/issues/15208
-  ifeq ($(shell $(LD) -ld_classic -no_fixup_chains 2>&1 | grep "unknown 
option"),)
+  ifeq ($(CONFIG_HAVE_CXXINITIALIZE),y)
     LDLINKFLAGS += -ld_classic -no_fixup_chains
     LDFLAGS += -Wl,-ld_classic,-no_fixup_chains
   endif
-endif
 
-# Keep the simulator executable from exporting NuttX symbols. Otherwise dyld
-# may resolve host libc references against NuttX's internal libc implementation
-# during process initialization, which can crash before main() runs.
-LDFLAGS += -Wl,-exported_symbol,__mh_execute_header
+  # Keep the simulator executable from exporting NuttX symbols. Otherwise dyld
+  # may resolve host libc references against NuttX's internal libc 
implementation
+  # during process initialization, which can crash before main() runs.
+  LDFLAGS += -Wl,-exported_symbol,__mh_execute_header
 else
   STDLIBS += -lrt
 endif
@@ -521,6 +512,9 @@ else
        $(Q) $(call LINK_ALLSYMS_KASAN)
        $(Q) $(call LINK_ALLSYMS_KASAN)
        $(Q) $(call LINK_ALLSYMS_KASAN)
+endif
+ifeq ($(CONFIG_HOST_MACOS)$(CONFIG_HAVE_CXXINITIALIZE),yy)
+       $(Q) python3 $(ARCH_SRCDIR)/patch_macho_initsection.py $(TOPDIR)/$@
 endif
        $(Q) $(NM) $(TOPDIR)/$@ | \
                grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] 
\)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | \
diff --git a/arch/sim/src/patch_macho_initsection.py 
b/arch/sim/src/patch_macho_initsection.py
new file mode 100755
index 00000000000..2369965af0e
--- /dev/null
+++ b/arch/sim/src/patch_macho_initsection.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+############################################################################
+# arch/sim/src/patch_macho_initsection.py
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.  The
+# ASF licenses this file to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance with the
+# License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.  See the License for the specific language governing
+# permissions and limitations under the License.
+#
+############################################################################
+
+"""Patch Mach-O init section type flags to prevent dyld from
+auto-running C++ constructors.
+
+Changes MOD_INIT_FUNC_POINTERS (0x9) section types to REGULAR (0x0)
+so that dyld ignores them.  NuttX will invoke the constructors
+explicitly from lib_cxx_initialize().
+
+Requires: pip install lief
+"""
+
+import argparse
+import sys
+
+try:
+    import lief
+except ImportError:
+    print(
+        "Error: lief is required. Install with: pip install lief",
+        file=sys.stderr,
+    )
+    sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Patch Mach-O init section 
type flags")
+    parser.add_argument("binary", help="Path to the Mach-O binary")
+    args = parser.parse_args()
+
+    binary = lief.MachO.parse(args.binary)
+    if binary is None:
+        print(f"Error: failed to parse {args.binary}", file=sys.stderr)
+        sys.exit(1)
+
+    fat = binary.at(0)
+    T = lief.MachO.Section.TYPE
+    patched = 0
+    for section in fat.sections:
+        if section.type == T.MOD_INIT_FUNC_POINTERS:
+            section.type = T.REGULAR
+            patched += 1
+
+    if patched:
+        fat.write(args.binary)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/arch/sim/src/sim/CMakeLists.txt b/arch/sim/src/sim/CMakeLists.txt
index 47d1efd8888..273acb2fcc7 100644
--- a/arch/sim/src/sim/CMakeLists.txt
+++ b/arch/sim/src/sim/CMakeLists.txt
@@ -150,14 +150,17 @@ list(
   sim_hosttime.c
   sim_hostuart.c)
 
-# Note: sim_macho_init.c is picky about the place in the object list for
-# linking. Namely, its constructor should be the first one in the executable.
-# For now, we are just assuming no other files in HOSTSRCS provide 
constructors.
-if(CONFIG_HOST_MACOS)
-  if(CONFIG_HAVE_CXXINITIALIZE)
-    list(APPEND HOSTSRCS sim_macho_init.c)
-    target_link_options(nuttx PRIVATE -Wl,-ld_classic,-no_fixup_chains)
-  endif()
+if(CONFIG_HOST_MACOS AND CONFIG_HAVE_CXXINITIALIZE)
+  # Keep classic __mod_init_func format so post-link lief patching works
+  target_link_options(nuttx PRIVATE -Wl,-ld_classic,-no_fixup_chains)
+  add_custom_command(
+    TARGET nuttx
+    POST_BUILD
+    COMMAND
+      ${Python3_EXECUTABLE}
+      ${CMAKE_CURRENT_SOURCE_DIR}/../patch_macho_initsection.py
+      $<TARGET_FILE:nuttx>
+    COMMENT "Patching Mach-O init section type flags")
 endif()
 
 if(CONFIG_SIM_CAMERA_V4L2)
diff --git a/arch/sim/src/sim/posix/sim_macho_init.c 
b/arch/sim/src/sim/posix/sim_macho_init.c
deleted file mode 100644
index 28f1c3ad0c4..00000000000
--- a/arch/sim/src/sim/posix/sim_macho_init.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/****************************************************************************
- * arch/sim/src/sim/posix/sim_macho_init.c
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.  The
- * ASF licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the
- * License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
- * License for the specific language governing permissions and limitations
- * under the License.
- *
- ****************************************************************************/
-
-/****************************************************************************
- * Included Files
- ****************************************************************************/
-
-#include <sys/mman.h>
-
-#include <assert.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-/****************************************************************************
- * Private Data
- ****************************************************************************/
-
-typedef void (*init_func_t)(int argc, const char *argv[],
-                            const char *envp[], const char *apple[]);
-extern init_func_t mod_init_func_start \
-__asm("section$start$__DATA$__mod_init_func");
-extern init_func_t mod_init_func_end \
-__asm("section$end$__DATA$__mod_init_func");
-
-static void noop(int argc, const char *argv[], const char *envp[],
-                 const char *apple[])
-{
-  /* nothing */
-}
-
-static init_func_t *g_saved_init_funcs;
-static unsigned int g_num_saved_init_funcs;
-static int g_saved_argc;
-static const char **g_saved_argv;
-static const char **g_saved_envp;
-static const char **g_saved_apple;
-
-static void
-allow_write(const void *start, const void *end)
-{
-  const size_t page_size = sysconf(_SC_PAGE_SIZE);
-  const size_t page_mask = ~(page_size - 1);
-  void *p = (void *)((uintptr_t)start & page_mask);
-  size_t sz = ((uintptr_t)end - (uintptr_t)p + page_size - 1) & ~page_mask;
-
-  /* It seems that Monterey (12.1) maps the section read-only.
-   * Make it writable as we want to patch it.
-   * This was not necessary for Mojave.
-   * Ignore failures as this might not be critical, depending on
-   * the OS version.
-   */
-
-  mprotect(p, sz, PROT_READ | PROT_WRITE);
-}
-
-__attribute__((constructor))
-static void save_and_replace_init_funcs(int argc, const char *argv[],
-                                        const char *envp[],
-                                        const char *apple[])
-{
-  init_func_t *fp;
-  unsigned int nfuncs = &mod_init_func_end - &mod_init_func_start;
-
-  assert(nfuncs > 0);
-  g_num_saved_init_funcs = nfuncs - 1;
-  if (g_num_saved_init_funcs == 0)
-    {
-      /* This function is the only constructor in the binary.
-       * no need to apply the following hack.
-       */
-
-      return;
-    }
-
-  g_saved_argc = argc;
-  g_saved_argv = argv;
-  g_saved_envp = envp;
-  g_saved_apple = apple;
-
-  g_saved_init_funcs = malloc(g_num_saved_init_funcs *
-                              sizeof(*g_saved_init_funcs));
-  allow_write(&mod_init_func_start, &mod_init_func_end);
-  int i = 0;
-  for (fp = &mod_init_func_start; fp < &mod_init_func_end; fp++)
-    {
-      if (*fp == save_and_replace_init_funcs)
-        {
-          assert(i == 0);
-        }
-      else
-        {
-          g_saved_init_funcs[i - 1] = *fp;
-          *fp = noop;
-        }
-      i++;
-    }
-}
-
-/****************************************************************************
- * Public Functions
- ****************************************************************************/
-
-/****************************************************************************
- * Name: macho_call_saved_init_funcs
- ****************************************************************************/
-
-void
-macho_call_saved_init_funcs(void)
-{
-  unsigned int i;
-  for (i = 0; i < g_num_saved_init_funcs; i++)
-    {
-      g_saved_init_funcs[i](g_saved_argc, g_saved_argv, g_saved_envp,
-                            g_saved_apple);
-    }
-}
diff --git a/libs/libc/misc/lib_cxx_initialize.c 
b/libs/libc/misc/lib_cxx_initialize.c
index 6b1d1970066..1f75d5ba8d4 100644
--- a/libs/libc/misc/lib_cxx_initialize.c
+++ b/libs/libc/misc/lib_cxx_initialize.c
@@ -38,10 +38,6 @@
  * External References
  ****************************************************************************/
 
-#if defined(CONFIG_ARCH_SIM) && defined(CONFIG_HOST_MACOS)
-extern void macho_call_saved_init_funcs(void);
-#endif
-
 /****************************************************************************
  * Public Functions
  ****************************************************************************/
@@ -69,9 +65,6 @@ void lib_cxx_initialize(void)
 
   if (inited == 0)
     {
-#if defined(CONFIG_ARCH_SIM) && defined(CONFIG_HOST_MACOS)
-      macho_call_saved_init_funcs();
-#else
       initializer_t *initp;
 
       sinfo("_sinit: %p _einit: %p\n", _sinit, _einit);
@@ -93,7 +86,6 @@ void lib_cxx_initialize(void)
               initializer();
             }
         }
-#endif
 
       inited = 1;
     }

Reply via email to