https://github.com/turran updated 
https://github.com/llvm/llvm-project/pull/153168

>From fc8c1a0e57efe07c9d4a9f46ad733fbba481cad6 Mon Sep 17 00:00:00 2001
From: Jorge Zapata <[email protected]>
Date: Mon, 23 Jun 2025 12:53:57 +0200
Subject: [PATCH 1/9] [wasm] Support different signature function pointers

---
 clang/lib/CodeGen/CGCall.cpp              |  20 ++
 clang/lib/CodeGen/CGExprConstant.cpp      |  13 ++
 clang/lib/CodeGen/TargetInfo.h            |  11 ++
 clang/lib/CodeGen/Targets/WebAssembly.cpp | 222 ++++++++++++++++++++++
 4 files changed, 266 insertions(+)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index b7b79e7051181..2b86004944118 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5067,6 +5067,26 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, 
const Expr *E,
     return;
   }
 
+  // For WebAssembly target we need to create thunk functions
+  // to properly handle function pointers args with a different signature.
+  // Due to opaque pointers, this can not be handled in LLVM
+  // (WebAssemblyFixFunctionBitcast) anymore
+  if (CGM.getTriple().isWasm() && type->isFunctionPointerType()) {
+    if (const DeclRefExpr *DRE =
+            CGM.getTargetCodeGenInfo().getWasmFunctionDeclRefExpr(
+                E, CGM.getContext())) {
+      llvm::Value *V = EmitLValue(DRE).getPointer(*this);
+      llvm::Function *Thunk =
+          CGM.getTargetCodeGenInfo().getOrCreateWasmFunctionPointerThunk(
+              CGM, V, DRE->getDecl()->getType(), type);
+      if (Thunk) {
+        RValue R = RValue::get(Thunk);
+        args.add(R, type);
+        return;
+      }
+    }
+  }
+
   args.add(EmitAnyExprToTemp(E), type);
 }
 
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp 
b/clang/lib/CodeGen/CGExprConstant.cpp
index 24712d3325b2e..57d93887ce2e2 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -2285,6 +2285,19 @@ ConstantLValueEmitter::tryEmitBase(const 
APValue::LValueBase &base) {
 
     if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
       llvm::Constant *C = CGM.getRawFunctionPointer(FD);
+      // ForWebAssembly target we need to create thunk functions
+      // to properly handle function pointers args with a different signature
+      // Due to opaque pointers, this can not be handled in LLVM
+      // (WebAssemblyFixFunctionBitcast) anymore
+      if (CGM.getTriple().isWasm() && DestType->isFunctionPointerType()) {
+        llvm::Function *Thunk =
+            CGM.getTargetCodeGenInfo().getOrCreateWasmFunctionPointerThunk(
+                CGM, C, D->getType(), DestType);
+        if (Thunk) {
+          C = Thunk;
+        }
+      }
+
       if (FD->getType()->isCFIUncheckedCalleeFunctionType())
         C = llvm::NoCFIValue::get(cast<llvm::GlobalValue>(C));
       return PtrAuthSign(C);
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 98ee894fe557f..c708f798627e5 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -403,6 +403,17 @@ class TargetCodeGenInfo {
   /// Return the WebAssembly funcref reference type.
   virtual llvm::Type *getWasmFuncrefReferenceType() const { return nullptr; }
 
+  virtual const DeclRefExpr *getWasmFunctionDeclRefExpr(const Expr *E,
+                                                        ASTContext &Ctx) const 
{
+    return nullptr;
+  }
+
+  virtual llvm::Function *getOrCreateWasmFunctionPointerThunk(
+      CodeGenModule &CGM, llvm::Value *OriginalFnPtr, QualType SrcType,
+      QualType DstType) const {
+    return nullptr;
+  }
+
   /// Emit the device-side copy of the builtin surface type.
   virtual bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF,
                                                       LValue Dst,
diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp 
b/clang/lib/CodeGen/Targets/WebAssembly.cpp
index ebe996a4edd8d..b0a4f35c80c3d 100644
--- a/clang/lib/CodeGen/Targets/WebAssembly.cpp
+++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp
@@ -9,9 +9,14 @@
 #include "ABIInfoImpl.h"
 #include "TargetInfo.h"
 
+#include "clang/AST/ParentMapContext.h"
+#include <sstream>
+
 using namespace clang;
 using namespace clang::CodeGen;
 
+#define DEBUG_TYPE "clang-target-wasm"
+
 
//===----------------------------------------------------------------------===//
 // WebAssembly ABI Implementation
 //
@@ -93,6 +98,112 @@ class WebAssemblyTargetCodeGenInfo final : public 
TargetCodeGenInfo {
   virtual llvm::Type *getWasmFuncrefReferenceType() const override {
     return llvm::Type::getWasm_FuncrefTy(getABIInfo().getVMContext());
   }
+
+  virtual const DeclRefExpr *
+  getWasmFunctionDeclRefExpr(const Expr *E, ASTContext &Ctx) const override {
+    // Go down in the tree until finding the DeclRefExpr
+    const DeclRefExpr *DRE = findDeclRefExpr(E);
+    if (!DRE)
+      return nullptr;
+
+    // Final case. The argument is a declared function
+    if (isa<FunctionDecl>(DRE->getDecl())) {
+      return DRE;
+    }
+
+    // Complex case. The argument is a variable, we need to check
+    // every assignment of the variable and see if we are bitcasting
+    // or not.
+    if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+      DRE = findDeclRefExprForVarUp(E, VD, Ctx);
+      if (DRE)
+        return DRE;
+
+      // If no assignment exists on every parent scope, check for the
+      // initialization
+      if (!DRE && VD->hasInit()) {
+        return getWasmFunctionDeclRefExpr(VD->getInit(), Ctx);
+      }
+    }
+
+    return nullptr;
+  }
+
+  virtual llvm::Function *getOrCreateWasmFunctionPointerThunk(
+      CodeGenModule &CGM, llvm::Value *OriginalFnPtr, QualType SrcType,
+      QualType DstType) const override {
+
+    // Get the signatures
+    const FunctionProtoType *SrcProtoType = 
SrcType->getAs<FunctionProtoType>();
+    const FunctionProtoType *DstProtoType = DstType->getAs<PointerType>()
+                                                ->getPointeeType()
+                                                ->getAs<FunctionProtoType>();
+
+    // This should only work for different number of arguments
+    if (DstProtoType->getNumParams() <= SrcProtoType->getNumParams())
+      return nullptr;
+
+    // Get the llvm function types
+    llvm::FunctionType *DstFunctionType = llvm::cast<llvm::FunctionType>(
+        CGM.getTypes().ConvertType(QualType(DstProtoType, 0)));
+    llvm::FunctionType *SrcFunctionType = llvm::cast<llvm::FunctionType>(
+        CGM.getTypes().ConvertType(QualType(SrcProtoType, 0)));
+
+    // Construct the Thunk function with the Target (destination) signature
+    std::string ThunkName = getThunkName(OriginalFnPtr->getName().str(),
+                                         DstProtoType, CGM.getContext());
+    llvm::Module &M = CGM.getModule();
+    llvm::Function *Thunk = llvm::Function::Create(
+        DstFunctionType, llvm::Function::InternalLinkage, ThunkName, M);
+
+    // Build the thunk body
+    llvm::IRBuilder<> Builder(
+        llvm::BasicBlock::Create(M.getContext(), "entry", Thunk));
+
+    // Gather the arguments for calling the original function
+    std::vector<llvm::Value *> CallArgs;
+    unsigned CallN = SrcProtoType->getNumParams();
+
+    auto ArgIt = Thunk->arg_begin();
+    for (unsigned i = 0; i < CallN && ArgIt != Thunk->arg_end(); ++i, ++ArgIt) 
{
+      llvm::Value *A = &*ArgIt;
+      CallArgs.push_back(A);
+    }
+
+    // Create the call to the original function pointer
+    llvm::CallInst *Call =
+        Builder.CreateCall(SrcFunctionType, OriginalFnPtr, CallArgs);
+
+    // Handle return type
+    llvm::Type *ThunkRetTy = DstFunctionType->getReturnType();
+
+    if (ThunkRetTy->isVoidTy()) {
+      Builder.CreateRetVoid();
+    } else {
+      llvm::Value *Ret = Call;
+      if (Ret->getType() != ThunkRetTy)
+        Ret = Builder.CreateBitCast(Ret, ThunkRetTy);
+      Builder.CreateRet(Ret);
+    }
+    LLVM_DEBUG(llvm::dbgs() << "getOrCreateWasmFunctionPointerThunk:"
+                            << " from " << OriginalFnPtr->getName().str()
+                            << " to " << ThunkName << "\n");
+    return Thunk;
+  }
+
+private:
+  // Build the thunk name: "%s_{type1}_{type2}_..."
+  std::string getThunkName(std::string OrigName,
+                           const FunctionProtoType *DstProto,
+                           const ASTContext &Ctx) const;
+  std::string sanitizeTypeString(const std::string &typeStr) const;
+  std::string getTypeName(const QualType &qt, const ASTContext &Ctx) const;
+  const DeclRefExpr *findDeclRefExpr(const Expr *E) const;
+  const DeclRefExpr *findDeclRefExprForVarDown(const Stmt *Parent,
+                                               const VarDecl *V,
+                                               ASTContext &Ctx) const;
+  const DeclRefExpr *findDeclRefExprForVarUp(const Expr *E, const VarDecl *V,
+                                             ASTContext &Ctx) const;
 };
 
 /// Classify argument of given type \p Ty.
@@ -171,3 +282,114 @@ CodeGen::createWebAssemblyTargetCodeGenInfo(CodeGenModule 
&CGM,
                                             WebAssemblyABIKind K) {
   return std::make_unique<WebAssemblyTargetCodeGenInfo>(CGM.getTypes(), K);
 }
+
+// Helper to sanitize type name string for use in function name
+std::string WebAssemblyTargetCodeGenInfo::sanitizeTypeString(
+    const std::string &typeStr) const {
+  std::string s;
+  for (char c : typeStr) {
+    if (isalnum(c))
+      s += c;
+    else if (c == ' ')
+      s += '_';
+    else
+      s += '_';
+  }
+  return s;
+}
+
+// Helper to generate the type string from QualType
+std::string
+WebAssemblyTargetCodeGenInfo::getTypeName(const QualType &qt,
+                                          const ASTContext &Ctx) const {
+  PrintingPolicy Policy(Ctx.getLangOpts());
+  Policy.SuppressTagKeyword = true;
+  Policy.SuppressScope = true;
+  Policy.AnonymousTagLocations = false;
+  std::string typeStr = qt.getAsString(Policy);
+  return sanitizeTypeString(typeStr);
+}
+
+std::string
+WebAssemblyTargetCodeGenInfo::getThunkName(std::string OrigName,
+                                           const FunctionProtoType *DstProto,
+                                           const ASTContext &Ctx) const {
+  std::ostringstream oss;
+  oss << "__" << OrigName;
+  for (unsigned i = 0; i < DstProto->getNumParams(); ++i) {
+    oss << "_" << getTypeName(DstProto->getParamType(i), Ctx);
+  }
+  return oss.str();
+}
+
+/// Recursively find the first DeclRefExpr in an Expr subtree.
+/// Returns nullptr if not found.
+const DeclRefExpr *
+WebAssemblyTargetCodeGenInfo::findDeclRefExpr(const Expr *E) const {
+  if (!E)
+    return nullptr;
+
+  // In case it is a function call, abort
+  if (isa<CallExpr>(E))
+    return nullptr;
+
+  // If this node is a DeclRefExpr, return it.
+  if (const auto *DRE = dyn_cast<DeclRefExpr>(E))
+    return DRE;
+
+  // Otherwise, recurse into children.
+  for (const Stmt *Child : E->children()) {
+    if (const auto *ChildExpr = dyn_cast_or_null<Expr>(Child)) {
+      if (const DeclRefExpr *Found = findDeclRefExpr(ChildExpr))
+        return Found;
+    }
+  }
+  return nullptr;
+}
+
+const DeclRefExpr *WebAssemblyTargetCodeGenInfo::findDeclRefExprForVarDown(
+    const Stmt *Parent, const VarDecl *V, ASTContext &Ctx) const {
+  if (!Parent)
+    return nullptr;
+
+  // Find down every assignment of V
+  // FIXME we need to stop before the expression where V is used
+  const BinaryOperator *A = nullptr;
+  for (const Stmt *Child : Parent->children()) {
+    if (const auto *BO = dyn_cast_or_null<BinaryOperator>(Child)) {
+      if (!BO->isAssignmentOp())
+        continue;
+      auto *LHS = llvm::dyn_cast<DeclRefExpr>(BO->getLHS());
+      if (LHS && LHS->getDecl() == V) {
+        A = BO;
+      }
+    }
+  }
+
+  // We have an assignment of the Var, recurse in it
+  if (A) {
+    return getWasmFunctionDeclRefExpr(A->getRHS(), Ctx);
+  }
+
+  return nullptr;
+}
+
+const DeclRefExpr *WebAssemblyTargetCodeGenInfo::findDeclRefExprForVarUp(
+    const Expr *E, const VarDecl *V, ASTContext &Ctx) const {
+  const clang::Stmt *cur = E;
+  while (cur) {
+    auto parents = Ctx.getParentMapContext().getParents(*cur);
+    if (parents.empty())
+      break;
+    const clang::Stmt *parentStmt = parents[0].get<clang::Stmt>();
+    if (!parentStmt)
+      break;
+    if (const auto *CS = dyn_cast<clang::CompoundStmt>(parentStmt)) {
+      const DeclRefExpr *DRE = findDeclRefExprForVarDown(CS, V, Ctx);
+      if (DRE)
+        return DRE;
+    }
+    cur = parentStmt;
+  }
+  return nullptr;
+}
\ No newline at end of file

>From 7d746cf68884ca7f7f90dc8fd107755890c483ad Mon Sep 17 00:00:00 2001
From: Kleis Auke Wolthuizen <[email protected]>
Date: Mon, 11 Aug 2025 14:47:55 +0200
Subject: [PATCH 2/9] Prefer use of Wasm signatures when building the thunk
 name

---
 clang/lib/CodeGen/Targets/WebAssembly.cpp | 66 ++++++++++++-----------
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp 
b/clang/lib/CodeGen/Targets/WebAssembly.cpp
index b0a4f35c80c3d..983f550d48349 100644
--- a/clang/lib/CodeGen/Targets/WebAssembly.cpp
+++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp
@@ -10,7 +10,6 @@
 #include "TargetInfo.h"
 
 #include "clang/AST/ParentMapContext.h"
-#include <sstream>
 
 using namespace clang;
 using namespace clang::CodeGen;
@@ -192,10 +191,11 @@ class WebAssemblyTargetCodeGenInfo final : public 
TargetCodeGenInfo {
   }
 
 private:
-  // Build the thunk name: "%s_{type1}_{type2}_..."
+  // Build the thunk name: "%s_{OrigName}_{WasmSig}"
   std::string getThunkName(std::string OrigName,
                            const FunctionProtoType *DstProto,
                            const ASTContext &Ctx) const;
+  char getTypeSig(const QualType &Ty, const ASTContext &Ctx) const;
   std::string sanitizeTypeString(const std::string &typeStr) const;
   std::string getTypeName(const QualType &qt, const ASTContext &Ctx) const;
   const DeclRefExpr *findDeclRefExpr(const Expr *E) const;
@@ -283,43 +283,49 @@ CodeGen::createWebAssemblyTargetCodeGenInfo(CodeGenModule 
&CGM,
   return std::make_unique<WebAssemblyTargetCodeGenInfo>(CGM.getTypes(), K);
 }
 
-// Helper to sanitize type name string for use in function name
-std::string WebAssemblyTargetCodeGenInfo::sanitizeTypeString(
-    const std::string &typeStr) const {
-  std::string s;
-  for (char c : typeStr) {
-    if (isalnum(c))
-      s += c;
-    else if (c == ' ')
-      s += '_';
-    else
-      s += '_';
+// Helper to get the type signature character for a given QualType
+// Returns a character that represents the given QualType in a wasm signature.
+// See getInvokeSig() in WebAssemblyAsmPrinter for related logic.
+char WebAssemblyTargetCodeGenInfo::getTypeSig(const QualType &Ty,
+                                              const ASTContext &Ctx) const {
+  if (Ty->isAnyPointerType()) {
+    return Ctx.getTypeSize(Ctx.VoidPtrTy) == 32 ? 'i' : 'j';
+  }
+  if (Ty->isIntegerType()) {
+    return Ctx.getTypeSize(Ty) <= 32 ? 'i' : 'j';
+  }
+  if (Ty->isFloatingType()) {
+    return Ctx.getTypeSize(Ty) <= 32 ? 'f' : 'd';
+  }
+  if (Ty->isVectorType()) {
+    return 'V';
+  }
+  if (Ty->isWebAssemblyTableType()) {
+    return 'F';
+  }
+  if (Ty->isWebAssemblyExternrefType()) {
+    return 'X';
   }
-  return s;
-}
 
-// Helper to generate the type string from QualType
-std::string
-WebAssemblyTargetCodeGenInfo::getTypeName(const QualType &qt,
-                                          const ASTContext &Ctx) const {
-  PrintingPolicy Policy(Ctx.getLangOpts());
-  Policy.SuppressTagKeyword = true;
-  Policy.SuppressScope = true;
-  Policy.AnonymousTagLocations = false;
-  std::string typeStr = qt.getAsString(Policy);
-  return sanitizeTypeString(typeStr);
+  llvm_unreachable("Unhandled QualType");
 }
 
 std::string
 WebAssemblyTargetCodeGenInfo::getThunkName(std::string OrigName,
                                            const FunctionProtoType *DstProto,
                                            const ASTContext &Ctx) const {
-  std::ostringstream oss;
-  oss << "__" << OrigName;
+
+  std::string ThunkName = "__" + OrigName + "_";
+  QualType RetTy = DstProto->getReturnType();
+  if (RetTy->isVoidType()) {
+    ThunkName += 'v';
+  } else {
+    ThunkName += getTypeSig(RetTy, Ctx);
+  }
   for (unsigned i = 0; i < DstProto->getNumParams(); ++i) {
-    oss << "_" << getTypeName(DstProto->getParamType(i), Ctx);
+    ThunkName += getTypeSig(DstProto->getParamType(i), Ctx);
   }
-  return oss.str();
+  return ThunkName;
 }
 
 /// Recursively find the first DeclRefExpr in an Expr subtree.
@@ -392,4 +398,4 @@ const DeclRefExpr 
*WebAssemblyTargetCodeGenInfo::findDeclRefExprForVarUp(
     cur = parentStmt;
   }
   return nullptr;
-}
\ No newline at end of file
+}

>From 6bfecf12bb569662d2e09d393f629b936fd9ebd5 Mon Sep 17 00:00:00 2001
From: Jorge Zapata <[email protected]>
Date: Mon, 11 Aug 2025 16:31:04 +0200
Subject: [PATCH 3/9] [wasm] Add a thunk cache

---
 clang/lib/CodeGen/Targets/WebAssembly.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp 
b/clang/lib/CodeGen/Targets/WebAssembly.cpp
index 983f550d48349..1bbc9ba4311fc 100644
--- a/clang/lib/CodeGen/Targets/WebAssembly.cpp
+++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp
@@ -8,6 +8,7 @@
 
 #include "ABIInfoImpl.h"
 #include "TargetInfo.h"
+#include "llvm/ADT/StringMap.h"
 
 #include "clang/AST/ParentMapContext.h"
 
@@ -56,6 +57,7 @@ class WebAssemblyTargetCodeGenInfo final : public 
TargetCodeGenInfo {
       : TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {
     SwiftInfo =
         std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
+    ThunkCache = llvm::StringMap<llvm::Function *>();
   }
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
@@ -151,6 +153,16 @@ class WebAssemblyTargetCodeGenInfo final : public 
TargetCodeGenInfo {
     // Construct the Thunk function with the Target (destination) signature
     std::string ThunkName = getThunkName(OriginalFnPtr->getName().str(),
                                          DstProtoType, CGM.getContext());
+    // Check if we already have a thunk for this function
+    if (auto It = ThunkCache.find(ThunkName); It != ThunkCache.end()) {
+      LLVM_DEBUG(llvm::dbgs() << "getOrCreateWasmFunctionPointerThunk: "
+                              << "found existing thunk for "
+                              << OriginalFnPtr->getName().str() << " as "
+                              << ThunkName << "\n");
+      return It->second;
+    }
+
+    // Create the thunk function
     llvm::Module &M = CGM.getModule();
     llvm::Function *Thunk = llvm::Function::Create(
         DstFunctionType, llvm::Function::InternalLinkage, ThunkName, M);
@@ -187,10 +199,14 @@ class WebAssemblyTargetCodeGenInfo final : public 
TargetCodeGenInfo {
     LLVM_DEBUG(llvm::dbgs() << "getOrCreateWasmFunctionPointerThunk:"
                             << " from " << OriginalFnPtr->getName().str()
                             << " to " << ThunkName << "\n");
+    // Cache the thunk
+    ThunkCache[ThunkName] = Thunk;
     return Thunk;
   }
 
 private:
+  // The thunk cache
+  mutable llvm::StringMap<llvm::Function *> ThunkCache;
   // Build the thunk name: "%s_{OrigName}_{WasmSig}"
   std::string getThunkName(std::string OrigName,
                            const FunctionProtoType *DstProto,

>From 90770b32fa0782f0ce57b770766f78423cb1ab2d Mon Sep 17 00:00:00 2001
From: Jorge Zapata <[email protected]>
Date: Tue, 12 Aug 2025 12:02:29 +0200
Subject: [PATCH 4/9] [wasm] Add function pointer thunk tests

---
 .../CodeGenWebAssembly/function-pointer-arg.c | 25 ++++++++++++++++
 .../function-pointer-field.c                  | 30 +++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 clang/test/CodeGenWebAssembly/function-pointer-arg.c
 create mode 100644 clang/test/CodeGenWebAssembly/function-pointer-field.c

diff --git a/clang/test/CodeGenWebAssembly/function-pointer-arg.c 
b/clang/test/CodeGenWebAssembly/function-pointer-arg.c
new file mode 100644
index 0000000000000..ff7b4186bbf7b
--- /dev/null
+++ b/clang/test/CodeGenWebAssembly/function-pointer-arg.c
@@ -0,0 +1,25 @@
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -O0 -o - %s | 
FileCheck %s
+
+// Test of function pointer bitcast in a function argument with different 
argument number in wasm32
+
+#define FUNCTION_POINTER(f) ((FunctionPointer)(f))
+typedef int (*FunctionPointer)(int a, int b);
+
+int fp_as_arg(FunctionPointer fp, int a, int b) {
+  return fp(a, b);
+}
+
+int fp_less(int a) {
+  return a;
+}
+
+// CHECK-LABEL: @test
+// CHECK: call i32 @fp_as_arg(ptr noundef @__fp_less_iii, i32 noundef 10, i32 
noundef 20)
+void test() {
+  fp_as_arg(FUNCTION_POINTER(fp_less), 10, 20);
+}
+
+// CHECK: define internal i32 @__fp_less_iii(i32 %0, i32 %1)
+// CHECK: %2 = call i32 @fp_less(i32 %0)
+// CHECK: ret i32 %2
\ No newline at end of file
diff --git a/clang/test/CodeGenWebAssembly/function-pointer-field.c 
b/clang/test/CodeGenWebAssembly/function-pointer-field.c
new file mode 100644
index 0000000000000..103a265ebf5fb
--- /dev/null
+++ b/clang/test/CodeGenWebAssembly/function-pointer-field.c
@@ -0,0 +1,30 @@
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -O0 -o - %s | 
FileCheck %s
+
+// Test of function pointer bitcast in a struct field with different argument 
number in wasm32
+
+#define FUNCTION_POINTER(f) ((FunctionPointer)(f))
+typedef int (*FunctionPointer)(int a, int b); 
+
+// CHECK: @__const.test.sfp = private unnamed_addr constant 
%struct._StructWithFunctionPointer { ptr @__fp_less_iii }, align 4
+
+typedef struct _StructWithFunctionPointer {
+  FunctionPointer fp;
+} StructWithFunctionPointer;
+
+int fp_less(int a) {
+  return a;
+}
+                                                  
+// CHECK-LABEL: @test
+void test() {
+  StructWithFunctionPointer sfp = {
+    FUNCTION_POINTER(fp_less)
+  };
+
+  int a1 = sfp.fp(10, 20);
+}
+
+// CHECK: define internal i32 @__fp_less_iii(i32 %0, i32 %1)
+// CHECK: %2 = call i32 @fp_less(i32 %0)
+// CHECK: ret i32 %2

>From 2686f61152163f03adf67e7139f09eaa1f51e4d8 Mon Sep 17 00:00:00 2001
From: Jorge Zapata <[email protected]>
Date: Wed, 3 Sep 2025 17:14:02 +0200
Subject: [PATCH 5/9] [wasm] Add -fwasm-fix-function-bitcasts to enable the
 thunk generation

---
 clang/include/clang/Basic/LangOptions.def              | 2 ++
 clang/include/clang/Options/Options.td                 | 6 ++++++
 clang/lib/CodeGen/CGCall.cpp                           | 3 ++-
 clang/lib/CodeGen/CGExprConstant.cpp                   | 4 +++-
 clang/lib/Driver/ToolChains/WebAssembly.cpp            | 4 ++++
 clang/test/CodeGenWebAssembly/function-pointer-arg.c   | 2 +-
 clang/test/CodeGenWebAssembly/function-pointer-field.c | 2 +-
 7 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/LangOptions.def 
b/clang/include/clang/Basic/LangOptions.def
index 6bba142aaf428..ae6fe51fd9794 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -530,6 +530,8 @@ LANGOPT(EnableLifetimeSafetyTUAnalysis, 1, 0, 
NotCompatible, "Lifetime safety at
 LANGOPT(PreserveVec3Type, 1, 0, NotCompatible, "Preserve 3-component vector 
type")
 LANGOPT(Reflection      , 1, 0, NotCompatible, "C++26 Reflection")
 
+LANGOPT(WasmFixFunctionBitcasts, 1, 0, Compatible, "Enable auto-generation of 
thunks for mismatched function pointer casts in WebAssembly")
+
 #undef LANGOPT
 #undef ENUM_LANGOPT
 #undef VALUE_LANGOPT
diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index 7e612aad92cda..c811a58d98f05 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -10083,6 +10083,7 @@ def fexperimental_emit_sgep
                "(experimental).">,
       MarshallingInfoFlag<LangOpts<"EmitStructuredGEP">>;
 
+// WebAssembly-only Options
 def no_wasm_opt : Flag<["--"], "no-wasm-opt">,
   Group<m_Group>,
   HelpText<"Disable the wasm-opt optimizer">,
@@ -10091,3 +10092,8 @@ def wasm_opt : Flag<["--"], "wasm-opt">,
   Group<m_Group>,
   HelpText<"Enable the wasm-opt optimizer (default)">,
   MarshallingInfoNegativeFlag<LangOpts<"NoWasmOpt">>;
+def fwasm_fix_function_bitcasts : Flag<["-"], "fwasm-fix-function-bitcasts">,
+  Group<f_Group>,
+  HelpText<"Enable auto-generation of thunks for mismatched function pointer 
casts in WebAssembly">,
+  Visibility<[ClangOption, CC1Option]>,
+  MarshallingInfoFlag<LangOpts<"WasmFixFunctionBitcasts">>;
\ No newline at end of file
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 2b86004944118..c7d616c724de7 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5071,7 +5071,8 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, 
const Expr *E,
   // to properly handle function pointers args with a different signature.
   // Due to opaque pointers, this can not be handled in LLVM
   // (WebAssemblyFixFunctionBitcast) anymore
-  if (CGM.getTriple().isWasm() && type->isFunctionPointerType()) {
+  if (CGM.getTriple().isWasm() && CGM.getLangOpts().WasmFixFunctionBitcasts &&
+      type->isFunctionPointerType()) {
     if (const DeclRefExpr *DRE =
             CGM.getTargetCodeGenInfo().getWasmFunctionDeclRefExpr(
                 E, CGM.getContext())) {
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp 
b/clang/lib/CodeGen/CGExprConstant.cpp
index 57d93887ce2e2..a7025dc65e45a 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -2289,7 +2289,9 @@ ConstantLValueEmitter::tryEmitBase(const 
APValue::LValueBase &base) {
       // to properly handle function pointers args with a different signature
       // Due to opaque pointers, this can not be handled in LLVM
       // (WebAssemblyFixFunctionBitcast) anymore
-      if (CGM.getTriple().isWasm() && DestType->isFunctionPointerType()) {
+      if (CGM.getTriple().isWasm() &&
+        CGM.getLangOpts().WasmFixFunctionBitcasts &&
+        DestType->isFunctionPointerType()) {
         llvm::Function *Thunk =
             CGM.getTargetCodeGenInfo().getOrCreateWasmFunctionPointerThunk(
                 CGM, C, D->getType(), DestType);
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp 
b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index e532ef0743cc2..1bd893f311bc2 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -432,6 +432,10 @@ void WebAssembly::addClangTargetOptions(const ArgList 
&DriverArgs,
     CC1Args.push_back("-wasm-enable-eh");
   }
 
+  if (DriverArgs.getLastArg(options::OPT_fwasm_fix_function_bitcasts)) {
+    CC1Args.push_back("-fwasm-fix-function-bitcasts");
+  }
+
   for (const Arg *A : DriverArgs.filtered(options::OPT_mllvm)) {
     StringRef Opt = A->getValue(0);
     if (Opt.starts_with("-emscripten-cxx-exceptions-allowed")) {
diff --git a/clang/test/CodeGenWebAssembly/function-pointer-arg.c 
b/clang/test/CodeGenWebAssembly/function-pointer-arg.c
index ff7b4186bbf7b..4ca85f11fe15f 100644
--- a/clang/test/CodeGenWebAssembly/function-pointer-arg.c
+++ b/clang/test/CodeGenWebAssembly/function-pointer-arg.c
@@ -1,5 +1,5 @@
 // REQUIRES: webassembly-registered-target
-// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -O0 -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -O0 
-fwasm-fix-function-bitcasts -o - %s | FileCheck %s
 
 // Test of function pointer bitcast in a function argument with different 
argument number in wasm32
 
diff --git a/clang/test/CodeGenWebAssembly/function-pointer-field.c 
b/clang/test/CodeGenWebAssembly/function-pointer-field.c
index 103a265ebf5fb..8c8424dc922e8 100644
--- a/clang/test/CodeGenWebAssembly/function-pointer-field.c
+++ b/clang/test/CodeGenWebAssembly/function-pointer-field.c
@@ -1,5 +1,5 @@
 // REQUIRES: webassembly-registered-target
-// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -O0 -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -O0 
-fwasm-fix-function-bitcasts -o - %s | FileCheck %s
 
 // Test of function pointer bitcast in a struct field with different argument 
number in wasm32
 

>From 2e5a015b97914b3cce271aa4215c22187284643e Mon Sep 17 00:00:00 2001
From: Jorge Zapata <[email protected]>
Date: Tue, 7 Apr 2026 01:52:05 +0200
Subject: [PATCH 6/9] [wasm] Add a more general approach at bitcast

---
 clang/lib/CodeGen/CGCall.cpp                  | 21 ----------
 clang/lib/CodeGen/CGExprScalar.cpp            | 21 ++++++++++
 clang/lib/CodeGen/Targets/WebAssembly.cpp     | 38 ++++++++++---------
 .../function-pointer-local.c                  | 28 ++++++++++++++
 .../function-pointer-void-assign.c            | 24 ++++++++++++
 5 files changed, 93 insertions(+), 39 deletions(-)
 create mode 100644 clang/test/CodeGenWebAssembly/function-pointer-local.c
 create mode 100644 clang/test/CodeGenWebAssembly/function-pointer-void-assign.c

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index c7d616c724de7..b7b79e7051181 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5067,27 +5067,6 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, 
const Expr *E,
     return;
   }
 
-  // For WebAssembly target we need to create thunk functions
-  // to properly handle function pointers args with a different signature.
-  // Due to opaque pointers, this can not be handled in LLVM
-  // (WebAssemblyFixFunctionBitcast) anymore
-  if (CGM.getTriple().isWasm() && CGM.getLangOpts().WasmFixFunctionBitcasts &&
-      type->isFunctionPointerType()) {
-    if (const DeclRefExpr *DRE =
-            CGM.getTargetCodeGenInfo().getWasmFunctionDeclRefExpr(
-                E, CGM.getContext())) {
-      llvm::Value *V = EmitLValue(DRE).getPointer(*this);
-      llvm::Function *Thunk =
-          CGM.getTargetCodeGenInfo().getOrCreateWasmFunctionPointerThunk(
-              CGM, V, DRE->getDecl()->getType(), type);
-      if (Thunk) {
-        RValue R = RValue::get(Thunk);
-        args.add(R, type);
-        return;
-      }
-    }
-  }
-
   args.add(EmitAnyExprToTemp(E), type);
 }
 
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index a8dcf22992983..7369060e60354 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2670,6 +2670,27 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   case CK_BlockPointerToObjCPointerCast:
   case CK_AnyPointerToBlockPointerCast:
   case CK_BitCast: {
+    // For WebAssembly, intercept function pointer bitcasts to a type with a
+    // different number of arguments and generate a thunk instead.  This is
+    // necessary because WebAssembly enforces strict call-site/callee signature
+    // matching at runtime.  The fix is gated on -fwasm-fix-function-bitcasts
+    // and only triggers when the source expression can be statically traced
+    // back to a concrete function declaration.
+    if (CGF.CGM.getTriple().isWasm() &&
+        CGF.CGM.getLangOpts().WasmFixFunctionBitcasts &&
+        DestTy->isFunctionPointerType()) {
+      if (const DeclRefExpr *DRE =
+              CGF.CGM.getTargetCodeGenInfo().getWasmFunctionDeclRefExpr(
+                  E, CGF.CGM.getContext())) {
+        llvm::Value *V = EmitLValue(DRE).getPointer(CGF);
+        llvm::Function *Thunk =
+            CGF.CGM.getTargetCodeGenInfo().getOrCreateWasmFunctionPointerThunk(
+                CGF.CGM, V, DRE->getDecl()->getType(), DestTy);
+        if (Thunk)
+          return Thunk;
+      }
+    }
+
     Value *Src = Visit(E);
     llvm::Type *SrcTy = Src->getType();
     llvm::Type *DstTy = ConvertType(DestTy);
diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp 
b/clang/lib/CodeGen/Targets/WebAssembly.cpp
index 1bbc9ba4311fc..3fc2693a11e15 100644
--- a/clang/lib/CodeGen/Targets/WebAssembly.cpp
+++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp
@@ -374,18 +374,21 @@ const DeclRefExpr 
*WebAssemblyTargetCodeGenInfo::findDeclRefExprForVarDown(
   if (!Parent)
     return nullptr;
 
-  // Find down every assignment of V
-  // FIXME we need to stop before the expression where V is used
+  // Find down every assignment of V.
+  // Standalone expression statements appear as Expr nodes directly under the
+  // CompoundStmt, so cast each child to Expr (if possible) and check for
+  // a BinaryOperator assignment.
   const BinaryOperator *A = nullptr;
   for (const Stmt *Child : Parent->children()) {
-    if (const auto *BO = dyn_cast_or_null<BinaryOperator>(Child)) {
-      if (!BO->isAssignmentOp())
-        continue;
-      auto *LHS = llvm::dyn_cast<DeclRefExpr>(BO->getLHS());
-      if (LHS && LHS->getDecl() == V) {
-        A = BO;
-      }
-    }
+    const auto *BO = dyn_cast_or_null<BinaryOperator>(Child);
+    if (!BO)
+      if (const auto *E = dyn_cast_or_null<Expr>(Child))
+        BO = dyn_cast<BinaryOperator>(E->IgnoreParenCasts());
+    if (!BO || !BO->isAssignmentOp())
+      continue;
+    auto *LHS = llvm::dyn_cast<DeclRefExpr>(BO->getLHS());
+    if (LHS && LHS->getDecl() == V)
+      A = BO;
   }
 
   // We have an assignment of the Var, recurse in it
@@ -398,20 +401,19 @@ const DeclRefExpr 
*WebAssemblyTargetCodeGenInfo::findDeclRefExprForVarDown(
 
 const DeclRefExpr *WebAssemblyTargetCodeGenInfo::findDeclRefExprForVarUp(
     const Expr *E, const VarDecl *V, ASTContext &Ctx) const {
-  const clang::Stmt *cur = E;
-  while (cur) {
-    auto parents = Ctx.getParentMapContext().getParents(*cur);
+  // Use a DynTypedNode to walk parents, since a Stmt may be parented by a Decl
+  // (e.g. a VarDecl initializer) and get<Stmt>() would return nullptr there.
+  auto cur = clang::DynTypedNode::create(*E);
+  while (true) {
+    auto parents = Ctx.getParentMapContext().getParents(cur);
     if (parents.empty())
       break;
-    const clang::Stmt *parentStmt = parents[0].get<clang::Stmt>();
-    if (!parentStmt)
-      break;
-    if (const auto *CS = dyn_cast<clang::CompoundStmt>(parentStmt)) {
+    cur = parents[0];
+    if (const auto *CS = cur.get<clang::CompoundStmt>()) {
       const DeclRefExpr *DRE = findDeclRefExprForVarDown(CS, V, Ctx);
       if (DRE)
         return DRE;
     }
-    cur = parentStmt;
   }
   return nullptr;
 }
diff --git a/clang/test/CodeGenWebAssembly/function-pointer-local.c 
b/clang/test/CodeGenWebAssembly/function-pointer-local.c
new file mode 100644
index 0000000000000..6320832abd19c
--- /dev/null
+++ b/clang/test/CodeGenWebAssembly/function-pointer-local.c
@@ -0,0 +1,28 @@
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -O0 
-fwasm-fix-function-bitcasts -o - %s | FileCheck %s
+
+// Test of function pointer bitcast stored in a local variable with different
+// argument number in wasm32. The cast happens via a CK_BitCast in the scalar
+// expression path (local variable assignment), which is intercepted in
+// CGExprScalar.cpp to generate a thunk — the same mechanism used for
+// function-argument and struct-field cases.
+
+#define FUNCTION_POINTER(f) ((FunctionPointer)(f))
+typedef int (*FunctionPointer)(int a, int b);
+
+int fp_less(int a) {
+  return a;
+}
+
+// CHECK-LABEL: @test
+// CHECK: store ptr @__fp_less_iii, ptr %fp
+// CHECK: %[[FP:.*]] = load ptr, ptr %fp
+// CHECK: call i32 %[[FP]](i32 noundef 10, i32 noundef 20)
+void test() {
+  FunctionPointer fp = FUNCTION_POINTER(fp_less);
+  fp(10, 20);
+}
+
+// CHECK: define internal i32 @__fp_less_iii(i32 %0, i32 %1)
+// CHECK: %2 = call i32 @fp_less(i32 %0)
+// CHECK: ret i32 %2
diff --git a/clang/test/CodeGenWebAssembly/function-pointer-void-assign.c 
b/clang/test/CodeGenWebAssembly/function-pointer-void-assign.c
new file mode 100644
index 0000000000000..640f84a33d127
--- /dev/null
+++ b/clang/test/CodeGenWebAssembly/function-pointer-void-assign.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -O0 \
+// RUN:   -fwasm-fix-function-bitcasts -o - %s | FileCheck %s
+
+// Test that a function pointer stored via a bare assignment into a void*
+// variable is correctly wrapped in a thunk when later cast to a different
+// function pointer type with more parameters.
+
+typedef int (*FP)(int, int);
+
+static int fp_one(int a) { return a < 0; }
+
+void test_void_ptr_bare_assign() {
+  // Bare assignment (no initializer) into void* — not via VD->hasInit()
+  void *p;
+  p = (void *)fp_one;
+  FP fp = (FP)p;
+  fp(10, 20);
+}
+
+// CHECK-LABEL: define void @test_void_ptr_bare_assign
+// CHECK:         store ptr @__fp_one_iii, ptr %fp
+// CHECK:       define internal i32 @__fp_one_iii(i32 %0, i32 %1)
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    %2 = call i32 @fp_one(i32 %0)

>From a4d7c8c023cb42fe03bc7ac45d19ac65027ff485 Mon Sep 17 00:00:00 2001
From: Jorge Zapata <[email protected]>
Date: Thu, 11 Jun 2026 12:20:23 +0200
Subject: [PATCH 7/9] [wasm] Add runtime wrapper for function pointer casts

This complements the existing compile-time thunk generation for statically
traceable function references.
---
 clang/lib/CodeGen/CGExprScalar.cpp            |  19 +-
 clang/lib/CodeGen/TargetInfo.h                |   9 +
 clang/lib/CodeGen/Targets/WebAssembly.cpp     | 180 +++++++++++++++++-
 .../function-pointer-runtime-cast.c           |  81 ++++++++
 4 files changed, 285 insertions(+), 4 deletions(-)
 create mode 100644 
clang/test/CodeGenWebAssembly/function-pointer-runtime-cast.c

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 7369060e60354..544559a0ae732 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2674,11 +2674,13 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     // different number of arguments and generate a thunk instead.  This is
     // necessary because WebAssembly enforces strict call-site/callee signature
     // matching at runtime.  The fix is gated on -fwasm-fix-function-bitcasts
-    // and only triggers when the source expression can be statically traced
-    // back to a concrete function declaration.
+    // and handles both compile-time (static) and runtime function pointer 
casts.
     if (CGF.CGM.getTriple().isWasm() &&
         CGF.CGM.getLangOpts().WasmFixFunctionBitcasts &&
-        DestTy->isFunctionPointerType()) {
+        DestTy->isFunctionPointerType() &&
+        CE->getSubExpr()->getType()->isFunctionPointerType()) {
+
+      // Try compile-time thunk first (for statically traceable function refs)
       if (const DeclRefExpr *DRE =
               CGF.CGM.getTargetCodeGenInfo().getWasmFunctionDeclRefExpr(
                   E, CGF.CGM.getContext())) {
@@ -2689,6 +2691,17 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
         if (Thunk)
           return Thunk;
       }
+
+      // If not statically traceable, use runtime binding for non-constant 
values
+      Value *Src = Visit(E);
+      if (!isa<llvm::Constant>(Src)) {
+        llvm::Value *RuntimeThunk =
+            
CGF.CGM.getTargetCodeGenInfo().emitWasmRuntimeFunctionPointerBinding(
+                CGF, Src, CE->getSubExpr()->getType(), DestTy);
+        if (RuntimeThunk)
+          return RuntimeThunk;
+      }
+      // Fall through to normal bitcast if runtime binding returns nullptr
     }
 
     Value *Src = Visit(E);
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index c708f798627e5..91b748f6e9f27 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -414,6 +414,15 @@ class TargetCodeGenInfo {
     return nullptr;
   }
 
+  /// Emit runtime binding for WebAssembly function pointer casts.
+  /// This handles runtime function pointer values (not compile-time constants)
+  /// that need signature adaptation.
+  virtual llvm::Value *emitWasmRuntimeFunctionPointerBinding(
+      CodeGenFunction &CGF, llvm::Value *FnPtr, QualType SrcType,
+      QualType DstType) const {
+    return nullptr;
+  }
+
   /// Emit the device-side copy of the builtin surface type.
   virtual bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF,
                                                       LValue Dst,
diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp 
b/clang/lib/CodeGen/Targets/WebAssembly.cpp
index 3fc2693a11e15..b685056f9a3dc 100644
--- a/clang/lib/CodeGen/Targets/WebAssembly.cpp
+++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp
@@ -204,13 +204,26 @@ class WebAssemblyTargetCodeGenInfo final : public 
TargetCodeGenInfo {
     return Thunk;
   }
 
+  llvm::Value *emitWasmRuntimeFunctionPointerBinding(
+      CodeGenFunction &CGF, llvm::Value *FnPtr, QualType SrcType,
+      QualType DstType) const override;
+
 private:
-  // The thunk cache
+  // The thunk cache for compile-time thunks
   mutable llvm::StringMap<llvm::Function *> ThunkCache;
+
+  // Runtime thunk cache: maps (SrcSig, DstSig) -> wrapper function
+  // The wrapper takes a function pointer and returns a thunk for it
+  mutable llvm::DenseMap<std::pair<const FunctionProtoType*, const 
FunctionProtoType*>,
+                         llvm::Function*> RuntimeWrapperCache;
+
   // Build the thunk name: "%s_{OrigName}_{WasmSig}"
   std::string getThunkName(std::string OrigName,
                            const FunctionProtoType *DstProto,
                            const ASTContext &Ctx) const;
+  std::string getRuntimeWrapperName(const FunctionProtoType *SrcProto,
+                                   const FunctionProtoType *DstProto,
+                                   const ASTContext &Ctx) const;
   char getTypeSig(const QualType &Ty, const ASTContext &Ctx) const;
   std::string sanitizeTypeString(const std::string &typeStr) const;
   std::string getTypeName(const QualType &qt, const ASTContext &Ctx) const;
@@ -293,6 +306,171 @@ RValue WebAssemblyABIInfo::EmitVAArg(CodeGenFunction 
&CGF, Address VAListAddr,
                           /*AllowHigherAlign=*/true, Slot);
 }
 
+// Generate wrapper name for runtime function pointer binding
+std::string WebAssemblyTargetCodeGenInfo::getRuntimeWrapperName(
+    const FunctionProtoType *SrcProto, const FunctionProtoType *DstProto,
+    const ASTContext &Ctx) const {
+  std::string Name = "__wasm_runtime_wrapper_";
+
+  // Encode source signature
+  QualType SrcRetTy = SrcProto->getReturnType();
+  if (SrcRetTy->isVoidType()) {
+    Name += 'v';
+  } else {
+    Name += getTypeSig(SrcRetTy, Ctx);
+  }
+  for (QualType ParamType : SrcProto->param_types()) {
+    Name += getTypeSig(ParamType, Ctx);
+  }
+
+  Name += "_to_";
+
+  // Encode destination signature
+  QualType DstRetTy = DstProto->getReturnType();
+  if (DstRetTy->isVoidType()) {
+    Name += 'v';
+  } else {
+    Name += getTypeSig(DstRetTy, Ctx);
+  }
+  for (QualType ParamType : DstProto->param_types()) {
+    Name += getTypeSig(ParamType, Ctx);
+  }
+
+  return Name;
+}
+
+// Emit runtime binding for function pointer cast
+// This handles cases like g_list_free_full where a runtime parameter
+// needs to be cast from fewer params to more params
+llvm::Value 
*WebAssemblyTargetCodeGenInfo::emitWasmRuntimeFunctionPointerBinding(
+    CodeGenFunction &CGF, llvm::Value *FnPtr, QualType SrcType,
+    QualType DstType) const {
+
+  const FunctionProtoType *SrcProto =
+      SrcType->getPointeeType()->getAs<FunctionProtoType>();
+  const FunctionProtoType *DstProto =
+      DstType->getPointeeType()->getAs<FunctionProtoType>();
+
+  if (!SrcProto || !DstProto)
+    return nullptr;
+
+  // Check parameter counts: source must have same or fewer params than 
destination
+  // We can add parameters (caller provides them, we ignore extras when 
calling source)
+  // We cannot remove parameters (caller doesn't provide them, we can't invent 
values)
+  unsigned SrcParams = SrcProto->getNumParams();
+  unsigned DstParams = DstProto->getNumParams();
+
+  if (SrcParams > DstParams)
+    return nullptr;  // Can't remove parameters
+
+  // Check return types: we can discard a return value but cannot invent one
+  // Allow: int -> void (discard return), int -> int (pass through), void -> 
void
+  // Reject: void -> int (can't invent return value)
+  QualType SrcRetTy = SrcProto->getReturnType();
+  QualType DstRetTy = DstProto->getReturnType();
+  bool sameReturnType = CGF.getContext().hasSameType(SrcRetTy, DstRetTy);
+
+  if (!DstRetTy->isVoidType() && !sameReturnType)
+    return nullptr;  // Can't invent return values
+
+  // Reject if signatures are identical (no adaptation needed)
+  if (SrcParams == DstParams && sameReturnType)
+    return nullptr;
+
+  LLVM_DEBUG(llvm::dbgs() << "emitWasmRuntimeFunctionPointerBinding: "
+                          << "src params=" << SrcParams
+                          << " dst params=" << DstParams << "\n");
+
+  auto Key = std::make_pair(SrcProto, DstProto);
+  auto It = RuntimeWrapperCache.find(Key);
+
+  llvm::Module &M = CGF.CGM.getModule();
+  llvm::LLVMContext &Context = M.getContext();
+  llvm::Type *PtrTy = llvm::PointerType::getUnqual(Context);
+
+  std::string WrapperName = getRuntimeWrapperName(SrcProto, DstProto, 
CGF.CGM.getContext());
+  std::string GlobalName = WrapperName + "_fptr";
+
+  // Get or create the global variable for storing the function pointer
+  // Use LinkOnceODRLinkage to match the wrapper function, allowing the linker
+  // to merge globals across translation units into a single shared variable
+  llvm::GlobalVariable *FnPtrGlobal = M.getNamedGlobal(GlobalName);
+  if (!FnPtrGlobal) {
+    FnPtrGlobal = new llvm::GlobalVariable(
+        M, PtrTy, /*isConstant=*/false, llvm::GlobalValue::LinkOnceODRLinkage,
+        llvm::Constant::getNullValue(PtrTy), GlobalName);
+    // Make it thread-local to support WebAssembly threads
+    FnPtrGlobal->setThreadLocalMode(llvm::GlobalValue::GeneralDynamicTLSModel);
+  }
+
+  llvm::Function *Wrapper;
+  if (It != RuntimeWrapperCache.end()) {
+    Wrapper = It->second;
+  } else {
+    // Create a new wrapper function that takes a function pointer
+    // and returns a thunk with the destination signature
+
+    llvm::FunctionType *SrcFnType = llvm::cast<llvm::FunctionType>(
+        CGF.CGM.getTypes().ConvertType(QualType(SrcProto, 0)));
+    llvm::FunctionType *DstFnType = llvm::cast<llvm::FunctionType>(
+        CGF.CGM.getTypes().ConvertType(QualType(DstProto, 0)));
+
+    // Wrapper signature: takes src function pointer, has dst signature
+    // Use LinkOnceODRLinkage to:
+    // 1. Prevent dead argument elimination (optimizer can't see all callers)
+    // 2. Allow linker to merge duplicates across modules (no symbol 
collisions)
+    // 3. Preserve exact signature required by WebAssembly type checking
+    Wrapper = llvm::Function::Create(
+        DstFnType, llvm::GlobalValue::LinkOnceODRLinkage, WrapperName, M);
+
+    // Mark as noinline to prevent inlining that would expose unused parameters
+    Wrapper->addFnAttr(llvm::Attribute::NoInline);
+    Wrapper->addFnAttr(llvm::Attribute::NoUnwind);
+
+    // Build wrapper body
+    llvm::BasicBlock *EntryBB = llvm::BasicBlock::Create(Context, "entry", 
Wrapper);
+    llvm::IRBuilder<> Builder(EntryBB);
+
+    // Load the stored function pointer
+    llvm::Value *StoredFnPtr = Builder.CreateLoad(PtrTy, FnPtrGlobal);
+
+    // Prepare arguments for the call (only pass what the source function 
expects)
+    llvm::SmallVector<llvm::Value *, 8> CallArgs;
+    auto ArgIt = Wrapper->arg_begin();
+    for (unsigned i = 0; i < SrcParams && ArgIt != Wrapper->arg_end(); ++i, 
++ArgIt) {
+      llvm::Value *A = &*ArgIt;
+      if (A->getType() != SrcFnType->getParamType(i))
+        A = Builder.CreateBitOrPointerCast(A, SrcFnType->getParamType(i));
+      CallArgs.push_back(A);
+    }
+
+    // Call the source function
+    llvm::CallInst *Call = Builder.CreateCall(SrcFnType, StoredFnPtr, 
CallArgs);
+
+    // Return the result
+    if (DstFnType->getReturnType()->isVoidTy()) {
+      Builder.CreateRetVoid();
+    } else {
+      llvm::Value *Ret = Call;
+      if (Ret->getType() != DstFnType->getReturnType())
+        Ret = Builder.CreateBitOrPointerCast(Ret, DstFnType->getReturnType());
+      Builder.CreateRet(Ret);
+    }
+
+    RuntimeWrapperCache[Key] = Wrapper;
+  }
+
+  // Store the function pointer in the global variable
+  CharUnits Alignment = CGF.CGM.getPointerAlign();
+  Address GlobalAddr(FnPtrGlobal, PtrTy, Alignment);
+
+  // Store the function pointer to be used by the wrapper
+  CGF.Builder.CreateStore(FnPtr, GlobalAddr);
+
+  // Return the wrapper function
+  return Wrapper;
+}
+
 std::unique_ptr<TargetCodeGenInfo>
 CodeGen::createWebAssemblyTargetCodeGenInfo(CodeGenModule &CGM,
                                             WebAssemblyABIKind K) {
diff --git a/clang/test/CodeGenWebAssembly/function-pointer-runtime-cast.c 
b/clang/test/CodeGenWebAssembly/function-pointer-runtime-cast.c
new file mode 100644
index 0000000000000..d41a10cf248c6
--- /dev/null
+++ b/clang/test/CodeGenWebAssembly/function-pointer-runtime-cast.c
@@ -0,0 +1,81 @@
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -O0 
-fwasm-fix-function-bitcasts -o - %s | FileCheck %s
+
+// Test runtime function pointer cast with different argument counts
+// This simulates cases like g_list_free_full where a function pointer 
parameter
+// is cast from fewer params to more params
+
+typedef void (*OneArgFunc)(void *);
+typedef void (*TwoArgFunc)(void *, void *);
+
+// Check for both TLS globals at the top of the output
+// CHECK: @__wasm_runtime_wrapper_vi_to_vii_fptr = linkonce_odr thread_local 
global ptr null
+// CHECK: @__wasm_runtime_wrapper_iii_to_vii_fptr = linkonce_odr thread_local 
global ptr null
+
+// A function with one argument
+void my_one_arg_func(void *ptr) {
+  // Do something
+}
+
+// Test case 1: Direct call of casted runtime function pointer
+// CHECK-LABEL: @runtime_cast_caller
+void runtime_cast_caller(OneArgFunc fp, void *data) {
+  // Cast the runtime parameter from 1-arg to 2-arg signature and call directly
+  // CHECK: store ptr %{{.*}}, ptr @__wasm_runtime_wrapper_vi_to_vii_fptr
+  // CHECK: call void @__wasm_runtime_wrapper_vi_to_vii(ptr
+  ((TwoArgFunc)fp)(data, (void*)0);
+}
+
+// The runtime wrapper should be generated once and shared by both cases
+// CHECK-LABEL: define linkonce_odr void @__wasm_runtime_wrapper_vi_to_vii(ptr 
%0, ptr %1)
+// CHECK: %{{.*}} = load ptr, ptr @__wasm_runtime_wrapper_vi_to_vii_fptr
+// CHECK: call void %{{.*}}(ptr %0)
+// CHECK: ret void
+
+// Test case 2: Pass casted runtime function pointer to another function
+// This is closer to the real g_list_free_full scenario
+// CHECK-LABEL: @library_function
+void library_function(TwoArgFunc func, void *data) {
+  // CHECK: call void %{{.*}}(ptr noundef %{{.*}}, ptr noundef null)
+  func(data, (void*)0);
+}
+
+// CHECK-LABEL: @indirect_caller
+void indirect_caller(OneArgFunc fp, void *data) {
+  // Cast and pass to another function (like g_list_free_full does)
+  // CHECK: store ptr %{{.*}}, ptr @__wasm_runtime_wrapper_vi_to_vii_fptr
+  // CHECK: call void @library_function(ptr noundef 
@__wasm_runtime_wrapper_vi_to_vii
+  library_function((TwoArgFunc)fp, data);
+}
+
+// A function with two arguments that returns int (like a comparator)
+int my_compare_func(void *a, void *b) {
+  return 0;
+}
+
+// Test case 3: Same param count, return type coercion (int -> void)
+// This simulates g_slist_sort where int compare(void*, void*) is cast to void 
func(void*, void*)
+// Use a typedef to create a function pointer type with int return
+typedef int (*CompareFunc)(void *, void *);
+
+// CHECK-LABEL: @test_return_coercion
+void test_return_coercion(CompareFunc fp, void *data) {
+  // Cast int(void*, void*) -> void(void*, void*) on a runtime parameter
+  // CHECK: store ptr %{{.*}}, ptr @__wasm_runtime_wrapper_iii_to_vii_fptr
+  // CHECK: call void @library_function(ptr noundef 
@__wasm_runtime_wrapper_iii_to_vii
+  library_function((TwoArgFunc)fp, data);
+}
+
+// The runtime wrapper for int->void coercion with same param count
+// CHECK-LABEL: define linkonce_odr void 
@__wasm_runtime_wrapper_iii_to_vii(ptr %0, ptr %1)
+// CHECK: %{{.*}} = load ptr, ptr @__wasm_runtime_wrapper_iii_to_vii_fptr
+// CHECK: %{{.*}} = call i32 %{{.*}}(ptr %0, ptr %1)
+// CHECK: ret void
+
+// CHECK-LABEL: @test
+void test() {
+  // Test both scenarios
+  runtime_cast_caller(my_one_arg_func, (void*)0);
+  indirect_caller(my_one_arg_func, (void*)0);
+  test_return_coercion(my_compare_func, (void*)0);
+}

>From 37fec39d10e61d0c48317555ae7378a8dbe78990 Mon Sep 17 00:00:00 2001
From: Jorge Zapata <[email protected]>
Date: Tue, 16 Jun 2026 22:24:40 +0200
Subject: [PATCH 8/9] [wasm] Match the static bitcast logic to the runtime one

---
 clang/lib/CodeGen/Targets/WebAssembly.cpp | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp 
b/clang/lib/CodeGen/Targets/WebAssembly.cpp
index b685056f9a3dc..8d24c16bb7f05 100644
--- a/clang/lib/CodeGen/Targets/WebAssembly.cpp
+++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp
@@ -140,8 +140,23 @@ class WebAssemblyTargetCodeGenInfo final : public 
TargetCodeGenInfo {
                                                 ->getPointeeType()
                                                 ->getAs<FunctionProtoType>();
 
-    // This should only work for different number of arguments
-    if (DstProtoType->getNumParams() <= SrcProtoType->getNumParams())
+    // Check parameter counts: source must have same or fewer params than 
destination
+    unsigned SrcParams = SrcProtoType->getNumParams();
+    unsigned DstParams = DstProtoType->getNumParams();
+
+    if (SrcParams > DstParams)
+      return nullptr;  // Can't remove parameters
+
+    // Check return types: we can discard a return value but cannot invent one
+    QualType SrcRetTy = SrcProtoType->getReturnType();
+    QualType DstRetTy = DstProtoType->getReturnType();
+    bool sameReturnType = CGM.getContext().hasSameType(SrcRetTy, DstRetTy);
+
+    if (!DstRetTy->isVoidType() && !sameReturnType)
+      return nullptr;  // Can't invent return values
+
+    // Reject if signatures are identical (no adaptation needed)
+    if (SrcParams == DstParams && sameReturnType)
       return nullptr;
 
     // Get the llvm function types
@@ -193,7 +208,7 @@ class WebAssemblyTargetCodeGenInfo final : public 
TargetCodeGenInfo {
     } else {
       llvm::Value *Ret = Call;
       if (Ret->getType() != ThunkRetTy)
-        Ret = Builder.CreateBitCast(Ret, ThunkRetTy);
+        Ret = Builder.CreateBitOrPointerCast(Ret, ThunkRetTy);
       Builder.CreateRet(Ret);
     }
     LLVM_DEBUG(llvm::dbgs() << "getOrCreateWasmFunctionPointerThunk:"

>From 218e499fe206f02894954bbd5aa5ddb3e5e78a48 Mon Sep 17 00:00:00 2001
From: Jorge Zapata <[email protected]>
Date: Fri, 19 Jun 2026 14:14:59 +0200
Subject: [PATCH 9/9] wasm: Improve the runtime casts

Use a direct wrapper for immediate casts and a set of wrappers
for saved casted function pointers. Include a cache mechamism
and a documentation for the whole approach.
---
 clang/docs/WebAssemblyFunctionBitcasts.rst    | 334 ++++++++++++++++
 clang/lib/CodeGen/CGExprScalar.cpp            |  17 +-
 clang/lib/CodeGen/TargetInfo.h                |   2 +-
 clang/lib/CodeGen/Targets/WebAssembly.cpp     | 377 ++++++++++++++----
 .../function-pointer-runtime-cast.c           |  63 +--
 5 files changed, 687 insertions(+), 106 deletions(-)
 create mode 100644 clang/docs/WebAssemblyFunctionBitcasts.rst

diff --git a/clang/docs/WebAssemblyFunctionBitcasts.rst 
b/clang/docs/WebAssemblyFunctionBitcasts.rst
new file mode 100644
index 0000000000000..19c3116fd28f9
--- /dev/null
+++ b/clang/docs/WebAssemblyFunctionBitcasts.rst
@@ -0,0 +1,334 @@
+==========================================
+WebAssembly Function Pointer Bitcast Fix
+==========================================
+
+.. contents::
+   :local:
+
+Overview
+========
+
+WebAssembly enforces strict function signature matching at ``call_indirect``
+sites. Native platforms silently allow calling a function through a pointer
+with a different signature (e.g., passing a 2-parameter function where a
+3-parameter function is expected). On WebAssembly, this causes a
+``RuntimeError: function signature mismatch``.
+
+The ``-fwasm-fix-function-bitcasts`` flag generates adapter thunks to bridge
+incompatible signatures at function pointer cast sites.
+
+**Constraints**:
+
+- ``SrcParams > DstParams`` → rejected (can't remove parameters — can't
+  invent values the caller didn't provide)
+- ``DstReturn != void && DstReturn != SrcReturn`` → rejected (can't invent
+  return values)
+- Identical signatures → skipped (no adaptation needed)
+
+1. Static Casts
+===============
+
+**What they are**: A cast from one function pointer type to another where the
+compiler can statically trace the original function. The function name is a
+compile-time constant.
+
+**C code example**:
+
+.. code-block:: c
+
+    typedef void (*OneArg)(void*);
+    typedef void (*TwoArg)(void*, void*);
+
+    void my_func(void *p) { }
+
+    void caller() {
+        TwoArg f = (TwoArg)my_func;   // my_func is known at compile time
+        f(data, NULL);
+    }
+
+**Solution**: One thunk function per (``original_function``,
+``dest_signature``) pair, cached by name. The thunk receives dest params, calls
+the original with src params.
+
+**Generated code**:
+
+.. code-block:: llvm
+
+    ; Thunk: __my_func_vii
+    define internal void @__my_func_vii(ptr %0, ptr %1) {
+        call void @my_func(ptr %0)    ; drops 2nd param
+        ret void
+    }
+
+**Cache**: ``ThunkCache`` — ``StringMap<Function*>`` keyed by
+``__<name>_<dest_sig>``.
+
+2. Runtime Casts
+================
+
+**What they are**: A cast where the source function pointer is a runtime value
+(function parameter, loaded from memory, etc.), not a compile-time constant.
+
+**C code example**:
+
+.. code-block:: c
+
+    void caller(OneArg fp) {
+        TwoArg f = (TwoArg)fp;        // fp is unknown at compile time
+        f(data, NULL);
+    }
+
+Runtime casts are split into two sub-cases based on usage pattern:
+
+2.1 Immediate Calls
+-------------------
+
+**What they are**: The cast result is immediately called (the cast expression 
is
+the callee of a ``CallExpr``). The wrapper is used and discarded within the 
same
+expression — no other code can observe the adapted pointer.
+
+**Detection**: The cast ``Expr``'s parent is a ``CallExpr`` and the cast is the
+callee.
+
+**C code example**:
+
+.. code-block:: c
+
+    void caller(OneArg fp, void *data) {
+        ((TwoArg)fp)(data, NULL);     // cast + immediate call
+    }
+
+**Solution**: One TLS slot + one wrapper function per signature pair per
+translation unit. The slot is thread-local — no races across threads.
+
+**Generated code**:
+
+.. code-block:: llvm
+
+    ; One TLS slot per signature pair per TU:
+    
@__wasm_runtime_pool___wasm_runtime_wrapper_iii_to_iiii____source_c_immediate_slot
+        = internal thread_local global ptr null
+
+    ; One wrapper, loads from TLS slot:
+    define internal void 
@__wasm_runtime_wrapper_iii_to_iiii____source_c_immediate(ptr %0, ptr %1) {
+        %fp = load ptr, ptr 
@__wasm_runtime_pool___wasm_runtime_wrapper_iii_to_iiii____source_c_immediate_slot
+        br %fp != null → call_bb, null_bb
+      call_bb:
+        call i32 %fp(ptr %0, ptr %1)   ; call source with adapted args
+        unreachable                     ; dest returns void, discard result
+      null_bb:
+        ret void                        ; defensive null return
+    }
+
+**Runtime (at each cast site)**:
+
+.. code-block:: llvm
+
+    br %fn_ptr == null → null_cont, not_null
+
+  not_null:
+    store ptr %fn_ptr, ptr 
@__wasm_runtime_pool___wasm_runtime_wrapper_iii_to_iiii____source_c_immediate_slot
+    br null_cont
+
+  null_cont:
+    %w = phi [ @__wasm_runtime_wrapper_iii_to_iiii____source_c_immediate, 
not_null ],
+             [ null, null_cont ]
+
+**Characteristics**:
+
+- 1 TLS slot, 1 wrapper per signature pair per TU
+- No counter, no atomics
+- Thread-safe via TLS
+- Reused every call — never runs out
+
+2.2 Store-For-Later (Closures)
+------------------------------
+
+**What they are**: The cast result is assigned to a variable, stored in a 
struct
+field, or passed to another function — the adapted pointer persists beyond the
+current expression. Multiple such stores may be active simultaneously.
+
+**Detection**: The cast ``Expr`` is NOT the callee of a ``CallExpr`` — this
+includes assignments, function arguments, initializers, struct field stores, 
and
+return statements.
+
+**C code example**:
+
+.. code-block:: c
+
+    // Closure pattern: multiple marshals stored in struct field
+    typedef void (*GMarshal)(void*, void*, void*, void*, void*, void*);
+
+    struct Closure {
+        GMarshal marshal;   // stored for later invocation
+    };
+
+    void set_closure(Closure *c, void (*notify)(void*, void*)) {
+        c->marshal = (GMarshal)notify;    // cast + store in struct
+    }
+
+**Corner cases — syntactically "store" but semantically "immediate"**:
+
+Some patterns look like "store-for-later" at the syntax level but are actually
+immediate calls from a single-threaded perspective. These are treated as
+store-for-later for safety:
+
+1. **Assign to local variable, then call**:
+
+   .. code-block:: c
+
+       TwoArg f = (TwoArg)fp;   // assignment → classified as store-for-later
+       f(a, b);                 // immediately called, but we can't track this
+
+   The cast is in an assignment, not a call callee. Our AST-level detection
+   sees this as a store. We conservatively treat it as store-for-later.
+
+2. **Pass cast result as function argument**:
+
+   .. code-block:: c
+
+       other_func((TwoArg)fp);  // passed as parameter → store-for-later
+
+   The cast result is passed to another function. We cannot know what
+   ``other_func`` does with it — it might call it immediately, store it in a
+   global, or pass it to another thread. We conservatively use the pool.
+
+**Solution**: Pre-allocated pool of 64 wrappers + 64 non-TLS slots + 8-entry
+direct-mapped cache + atomic counter per signature pair per translation unit.
+
+**Generated code (pool setup, once per TU per signature pair)**:
+
+.. code-block:: llvm
+
+    ; 64 wrapper functions, each loading from its dedicated slot:
+    ; Wrapper 0:
+    define internal void 
@__wasm_runtime_wrapper_vii_to_viiiiii____gobject_gclosure_c_0(
+        ptr %0, ptr %1, ptr %2, ptr %3, ptr %4, ptr %5) {
+        %fp = load ptr, ptr getelementptr(ptr, @slots, i32 0)
+        br %fp != null → call_bb, null_bb
+      call_bb:
+        call void %fp(ptr %0, ptr %1)    ; source: 2 params
+        ret void
+      null_bb:
+        ret void
+    }
+    ; Wrappers 1-63: same pattern, each loads slots[1] through slots[63]
+
+    ; Pool globals:
+    @counter  = internal thread_local global i32 0
+    @slots    = internal global [64 x ptr] zeroinitializer
+    @wrappers = internal constant [64 x ptr] [ @wrapper_0, ..., @wrapper_63 ]
+
+    ; 8-entry direct-mapped cache:
+    @cache_keys     = internal global [8 x ptr] zeroinitializer
+    @cache_wrappers = internal global [8 x ptr] zeroinitializer
+
+**Runtime flow (at each cast site)**:
+
+1. **Cache lookup** — O(1): hash fn_ptr, check cache. Hit → return cached 
wrapper.
+2. **Pool scan** — O(64): linear scan of slots. Found → update cache, return 
wrapper.
+3. **Allocate** — O(1): atomic increment counter, check overflow, store fn_ptr 
+ update cache.
+
+Overflow (>64 unique fn_ptrs) traps via ``llvm.trap``.
+
+**Deduplication**: Cache + pool scan ensures each fn_ptr maps to exactly one
+wrapper, even across cache evictions. Same fn_ptr always returns same wrapper.
+
+**Characteristics**:
+
+- 64 wrappers + 64 non-TLS slots + 8-entry cache per signature pair per TU
+- Deterministic: same fn_ptr always returns same wrapper (no duplicate 
allocation)
+- O(1) cache hit, O(64) scan on first miss, O(1) atomic on allocation
+- Atomic counter (TLS), shared slots + cache (non-TLS)
+- Traps if >64 **unique** fn_ptrs cast in store-for-later context
+- Each wrapper has a null check for defensive safety
+
+3. Non-Supported Cases and Limitations
+======================================
+
+3.1 Standard Compliance
+-----------------------
+
+The C and C++ standards specify exactly one guarantee for function pointer
+casts:
+
+    Converting a prvalue of type "pointer to ``T1``" to the type "pointer to
+    ``T2``" (where ``T1`` and ``T2`` are function types) and back to its
+    original type yields the original pointer value.
+
+Our adapter thunks violate this guarantee. Each cast returns a **different**
+pointer — the wrapper function's address:
+
+.. code-block:: c
+
+    typedef void (*OneArg)(void*);
+    typedef void (*TwoArg)(void*, void*);
+
+    OneArg f = my_func;
+    TwoArg g = (TwoArg)f;     // returns wrapper function pointer, NOT f
+    OneArg h = (OneArg)g;     // returns another wrapper, NOT f
+    // h != f  ← violates round-trip guarantee
+
+This is an accepted deviation: WebAssembly's type system cannot express the
+standard's semantics. The alternative is a runtime crash on ``call_indirect``.
+
+3.2 Chained Casts Through Memory
+--------------------------------
+
+When a function pointer is cast, stored in a struct field, loaded back, and
+cast again, the chain of casts cannot be tracked at compile time. Each cast
+sees only the **declared** type of the source expression, not the **actual**
+type of the function that was originally stored.
+
+**Real-world example — gstreamer ``gstutils.c``**:
+
+.. code-block:: c
+
+    // Type definitions:
+    typedef void (*GFunc)(gpointer, gpointer);              // vii
+    typedef void (*GstCallAsyncFunc)(gpointer);              // vi
+    typedef void (*GstObjectCallAsyncFunc)(GstObject*, gpointer); // vii
+
+    struct GstCallAsyncData {
+        GstObject *object;
+        GstCallAsyncFunc func;   // declared as vi (1 param)
+        gpointer user_data;
+    };
+
+    // Store: vii → vi → REJECTED (can't remove params) → raw bitcast
+    data->func = (GstCallAsyncFunc)vii_func;
+
+    // Load: vi → vii → looks valid, wraps. But stored func is actually vii.
+    // Wrapper calls it as vi (1 param), but it's vii (2 params) → MISMATCH.
+    GstObjectCallAsyncFunc func = (GstObjectCallAsyncFunc)data->func;
+    (*func)(data->object, data->user_data);
+
+**Why it fails**: The struct field is declared as ``vi`` but sometimes stores a
+``vii`` function. Our code sees ``vi → vii`` at the load site (valid) but the
+actual function in memory is ``vii`` (from a rejected store). This is a
+pre-existing source bug — the struct field type doesn't match what's actually
+stored there.
+
+**General rule**: When a struct field holds multiple function types through
+different code paths, our per-cast-site adaptation cannot determine the correct
+runtime type. The fix belongs in the source code (use ``gpointer`` for the
+field, or a union, or ensure consistent types).
+
+3.3 Other Limitations
+---------------------
+
+- **Source params > Dest params**: Cannot remove parameters — we can't invent
+  values the caller didn't provide. Falls through to raw bitcast (wasm crash).
+- **Dest returns non-void and doesn't match source return**: Cannot invent
+  return values. Falls through to raw bitcast (wasm crash).
+- **Identical signatures**: No adaptation needed. Skipped.
+- **Null function pointers**: Skipped — falls through to raw bitcast (wasm
+  would crash on null call regardless).
+
+Key Source Files
+================
+
+- ``clang/lib/CodeGen/Targets/WebAssembly.cpp`` — thunk/wrapper generation
+- ``clang/lib/CodeGen/CGExprScalar.cpp`` — cast site detection and dispatch
+- ``clang/lib/CodeGen/TargetInfo.h`` — virtual interface
+- ``clang/test/CodeGenWebAssembly/function-pointer-runtime-cast.c`` — LLVM IR 
tests
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 544559a0ae732..752ef330be1ff 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2695,9 +2695,24 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
       // If not statically traceable, use runtime binding for non-constant 
values
       Value *Src = Visit(E);
       if (!isa<llvm::Constant>(Src)) {
+        // Detect call-immediately vs store-for-later pattern.
+        // Call-immediately: the cast result is the callee of a CallExpr
+        //   -> use 1 TLS slot, no pool needed
+        // Store-for-later: the cast result is assigned/saved
+        //   -> use pool with 64 slots for closure support
+        bool IsImmediate = false;
+        auto parents = CGF.getContext().getParentMapContext().getParents(*E);
+        for (auto &parent : parents) {
+          if (auto *Call = parent.get<CallExpr>()) {
+            if (Call->getCallee()->IgnoreParens() == E) {
+              IsImmediate = true;
+              break;
+            }
+          }
+        }
         llvm::Value *RuntimeThunk =
             
CGF.CGM.getTargetCodeGenInfo().emitWasmRuntimeFunctionPointerBinding(
-                CGF, Src, CE->getSubExpr()->getType(), DestTy);
+                CGF, Src, CE->getSubExpr()->getType(), DestTy, IsImmediate);
         if (RuntimeThunk)
           return RuntimeThunk;
       }
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 91b748f6e9f27..ea2d7deae8ac1 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -419,7 +419,7 @@ class TargetCodeGenInfo {
   /// that need signature adaptation.
   virtual llvm::Value *emitWasmRuntimeFunctionPointerBinding(
       CodeGenFunction &CGF, llvm::Value *FnPtr, QualType SrcType,
-      QualType DstType) const {
+      QualType DstType, bool IsImmediate) const {
     return nullptr;
   }
 
diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp 
b/clang/lib/CodeGen/Targets/WebAssembly.cpp
index 8d24c16bb7f05..1184785fed1ec 100644
--- a/clang/lib/CodeGen/Targets/WebAssembly.cpp
+++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp
@@ -9,6 +9,7 @@
 #include "ABIInfoImpl.h"
 #include "TargetInfo.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Intrinsics.h"
 
 #include "clang/AST/ParentMapContext.h"
 
@@ -147,12 +148,14 @@ class WebAssemblyTargetCodeGenInfo final : public 
TargetCodeGenInfo {
     if (SrcParams > DstParams)
       return nullptr;  // Can't remove parameters
 
-    // Check return types: we can discard a return value but cannot invent one
+    // Check return types: compare LLVM types, not C types.
     QualType SrcRetTy = SrcProtoType->getReturnType();
     QualType DstRetTy = DstProtoType->getReturnType();
-    bool sameReturnType = CGM.getContext().hasSameType(SrcRetTy, DstRetTy);
+    llvm::Type *SrcRetLLVMTy = CGM.getTypes().ConvertType(SrcRetTy);
+    llvm::Type *DstRetLLVMTy = CGM.getTypes().ConvertType(DstRetTy);
+    bool sameReturnType = SrcRetLLVMTy == DstRetLLVMTy;
 
-    if (!DstRetTy->isVoidType() && !sameReturnType)
+    if (!DstProtoType->getReturnType()->isVoidType() && !sameReturnType)
       return nullptr;  // Can't invent return values
 
     // Reject if signatures are identical (no adaptation needed)
@@ -221,7 +224,7 @@ class WebAssemblyTargetCodeGenInfo final : public 
TargetCodeGenInfo {
 
   llvm::Value *emitWasmRuntimeFunctionPointerBinding(
       CodeGenFunction &CGF, llvm::Value *FnPtr, QualType SrcType,
-      QualType DstType) const override;
+      QualType DstType, bool IsImmediate) const override;
 
 private:
   // The thunk cache for compile-time thunks
@@ -359,7 +362,7 @@ std::string 
WebAssemblyTargetCodeGenInfo::getRuntimeWrapperName(
 // needs to be cast from fewer params to more params
 llvm::Value 
*WebAssemblyTargetCodeGenInfo::emitWasmRuntimeFunctionPointerBinding(
     CodeGenFunction &CGF, llvm::Value *FnPtr, QualType SrcType,
-    QualType DstType) const {
+    QualType DstType, bool IsImmediate) const {
 
   const FunctionProtoType *SrcProto =
       SrcType->getPointeeType()->getAs<FunctionProtoType>();
@@ -378,12 +381,13 @@ llvm::Value 
*WebAssemblyTargetCodeGenInfo::emitWasmRuntimeFunctionPointerBinding
   if (SrcParams > DstParams)
     return nullptr;  // Can't remove parameters
 
-  // Check return types: we can discard a return value but cannot invent one
-  // Allow: int -> void (discard return), int -> int (pass through), void -> 
void
-  // Reject: void -> int (can't invent return value)
+  // Check return types: we can discard a return value but cannot invent one.
+  // Compare LLVM types (not C types) since wasm only cares about 
i32/i64/f32/f64.
   QualType SrcRetTy = SrcProto->getReturnType();
   QualType DstRetTy = DstProto->getReturnType();
-  bool sameReturnType = CGF.getContext().hasSameType(SrcRetTy, DstRetTy);
+  llvm::Type *SrcRetLLVMTy = CGF.CGM.getTypes().ConvertType(SrcRetTy);
+  llvm::Type *DstRetLLVMTy = CGF.CGM.getTypes().ConvertType(DstRetTy);
+  bool sameReturnType = SrcRetLLVMTy == DstRetLLVMTy;
 
   if (!DstRetTy->isVoidType() && !sameReturnType)
     return nullptr;  // Can't invent return values
@@ -392,98 +396,309 @@ llvm::Value 
*WebAssemblyTargetCodeGenInfo::emitWasmRuntimeFunctionPointerBinding
   if (SrcParams == DstParams && sameReturnType)
     return nullptr;
 
+  // A null function pointer needs no wrapper — fall through to bitcast
+  if (isa<llvm::ConstantPointerNull>(FnPtr))
+    return nullptr;
+
   LLVM_DEBUG(llvm::dbgs() << "emitWasmRuntimeFunctionPointerBinding: "
                           << "src params=" << SrcParams
                           << " dst params=" << DstParams << "\n");
 
-  auto Key = std::make_pair(SrcProto, DstProto);
-  auto It = RuntimeWrapperCache.find(Key);
-
   llvm::Module &M = CGF.CGM.getModule();
   llvm::LLVMContext &Context = M.getContext();
-  llvm::Type *PtrTy = llvm::PointerType::getUnqual(Context);
+  llvm::PointerType *PtrTy = llvm::PointerType::getUnqual(Context);
+  llvm::Type *I32Ty = llvm::IntegerType::getInt32Ty(Context);
 
-  std::string WrapperName = getRuntimeWrapperName(SrcProto, DstProto, 
CGF.CGM.getContext());
-  std::string GlobalName = WrapperName + "_fptr";
-
-  // Get or create the global variable for storing the function pointer
-  // Use LinkOnceODRLinkage to match the wrapper function, allowing the linker
-  // to merge globals across translation units into a single shared variable
-  llvm::GlobalVariable *FnPtrGlobal = M.getNamedGlobal(GlobalName);
-  if (!FnPtrGlobal) {
-    FnPtrGlobal = new llvm::GlobalVariable(
-        M, PtrTy, /*isConstant=*/false, llvm::GlobalValue::LinkOnceODRLinkage,
-        llvm::Constant::getNullValue(PtrTy), GlobalName);
-    // Make it thread-local to support WebAssembly threads
-    FnPtrGlobal->setThreadLocalMode(llvm::GlobalValue::GeneralDynamicTLSModel);
-  }
+  // Pre-allocated pool: N wrapper functions + N TLS slots per signature pair.
+  // Each runtime invocation atomically claims a slot. This supports both
+  // "call immediately" and "store for later" patterns without overwrites.
+  static const unsigned POOL_SIZE = 64;
 
-  llvm::Function *Wrapper;
-  if (It != RuntimeWrapperCache.end()) {
-    Wrapper = It->second;
-  } else {
-    // Create a new wrapper function that takes a function pointer
-    // and returns a thunk with the destination signature
+  std::string WrapperName = getRuntimeWrapperName(SrcProto, DstProto, 
CGF.CGM.getContext());
 
-    llvm::FunctionType *SrcFnType = llvm::cast<llvm::FunctionType>(
+  std::string SourceId = M.getSourceFileName();
+  if (SourceId.empty())
+    SourceId = M.getName();
+  for (char &C : SourceId)
+    if (!isalnum(C) && C != '_')
+      C = '_';
+  WrapperName += "_" + SourceId;
+
+  std::string PoolName = "__wasm_runtime_pool_" + WrapperName;
+
+  // Get or create pool globals (once per module per signature pair)
+  llvm::GlobalVariable *Counter = M.getNamedGlobal(PoolName + "_counter");
+  llvm::GlobalVariable *ImmediateSlot = M.getNamedGlobal(PoolName + 
"_immediate_slot");
+  llvm::Function *ImmediateWrapper = M.getFunction(WrapperName + "_immediate");
+  llvm::ArrayType *SlotArrayTy = llvm::ArrayType::get(PtrTy, POOL_SIZE);
+  llvm::GlobalVariable *Slots = nullptr;
+  llvm::GlobalVariable *WrapperTable = nullptr;
+  llvm::FunctionType *SrcFnType = nullptr;
+  llvm::FunctionType *DstFnType = nullptr;
+
+  if (!Counter) {
+    SrcFnType = llvm::cast<llvm::FunctionType>(
         CGF.CGM.getTypes().ConvertType(QualType(SrcProto, 0)));
-    llvm::FunctionType *DstFnType = llvm::cast<llvm::FunctionType>(
+    DstFnType = llvm::cast<llvm::FunctionType>(
         CGF.CGM.getTypes().ConvertType(QualType(DstProto, 0)));
 
-    // Wrapper signature: takes src function pointer, has dst signature
-    // Use LinkOnceODRLinkage to:
-    // 1. Prevent dead argument elimination (optimizer can't see all callers)
-    // 2. Allow linker to merge duplicates across modules (no symbol 
collisions)
-    // 3. Preserve exact signature required by WebAssembly type checking
-    Wrapper = llvm::Function::Create(
-        DstFnType, llvm::GlobalValue::LinkOnceODRLinkage, WrapperName, M);
-
-    // Mark as noinline to prevent inlining that would expose unused parameters
-    Wrapper->addFnAttr(llvm::Attribute::NoInline);
-    Wrapper->addFnAttr(llvm::Attribute::NoUnwind);
-
-    // Build wrapper body
-    llvm::BasicBlock *EntryBB = llvm::BasicBlock::Create(Context, "entry", 
Wrapper);
-    llvm::IRBuilder<> Builder(EntryBB);
-
-    // Load the stored function pointer
-    llvm::Value *StoredFnPtr = Builder.CreateLoad(PtrTy, FnPtrGlobal);
-
-    // Prepare arguments for the call (only pass what the source function 
expects)
-    llvm::SmallVector<llvm::Value *, 8> CallArgs;
-    auto ArgIt = Wrapper->arg_begin();
-    for (unsigned i = 0; i < SrcParams && ArgIt != Wrapper->arg_end(); ++i, 
++ArgIt) {
-      llvm::Value *A = &*ArgIt;
-      if (A->getType() != SrcFnType->getParamType(i))
-        A = Builder.CreateBitOrPointerCast(A, SrcFnType->getParamType(i));
-      CallArgs.push_back(A);
+    Counter = new llvm::GlobalVariable(
+        M, I32Ty, false, llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantInt::get(I32Ty, 0), PoolName + "_counter");
+    Counter->setThreadLocalMode(llvm::GlobalValue::GeneralDynamicTLSModel);
+
+    // Immediate-call TLS slot: per-thread, no races, reused every call
+    ImmediateSlot = new llvm::GlobalVariable(
+        M, PtrTy, false, llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantPointerNull::get(PtrTy), PoolName + "_immediate_slot");
+    
ImmediateSlot->setThreadLocalMode(llvm::GlobalValue::GeneralDynamicTLSModel);
+
+    // Immediate wrapper: loads from TLS slot, calls with adapted signature
+    ImmediateWrapper = llvm::Function::Create(
+        DstFnType, llvm::GlobalValue::InternalLinkage,
+        WrapperName + "_immediate", M);
+    ImmediateWrapper->addFnAttr(llvm::Attribute::NoInline);
+    ImmediateWrapper->addFnAttr(llvm::Attribute::NoUnwind);
+    {
+      llvm::BasicBlock *BB = llvm::BasicBlock::Create(Context, "entry", 
ImmediateWrapper);
+      llvm::IRBuilder<> B(BB);
+      llvm::Value *FP = B.CreateLoad(PtrTy, ImmediateSlot);
+      llvm::BasicBlock *CallBB = llvm::BasicBlock::Create(Context, "call", 
ImmediateWrapper);
+      llvm::BasicBlock *NullBB = llvm::BasicBlock::Create(Context, "nullslot", 
ImmediateWrapper);
+      B.CreateCondBr(B.CreateIsNotNull(FP), CallBB, NullBB);
+      B.SetInsertPoint(CallBB);
+      llvm::SmallVector<llvm::Value *, 8> ImmArgs;
+      auto AI = ImmediateWrapper->arg_begin();
+      for (unsigned J = 0; J < SrcParams && AI != ImmediateWrapper->arg_end(); 
++J, ++AI) {
+        llvm::Value *A = &*AI;
+        if (A->getType() != SrcFnType->getParamType(J))
+          A = B.CreateBitOrPointerCast(A, SrcFnType->getParamType(J));
+        ImmArgs.push_back(A);
+      }
+      llvm::CallInst *ImmCall = B.CreateCall(SrcFnType, FP, ImmArgs);
+      if (DstFnType->getReturnType()->isVoidTy()) {
+        B.CreateRetVoid(); B.SetInsertPoint(NullBB); B.CreateRetVoid();
+      } else {
+        llvm::Value *R = ImmCall;
+        if (R->getType() != DstFnType->getReturnType())
+          R = B.CreateBitOrPointerCast(R, DstFnType->getReturnType());
+        B.CreateRet(R);
+        B.SetInsertPoint(NullBB);
+        B.CreateRet(llvm::Constant::getNullValue(DstFnType->getReturnType()));
+      }
     }
 
-    // Call the source function
-    llvm::CallInst *Call = Builder.CreateCall(SrcFnType, StoredFnPtr, 
CallArgs);
+    Slots = new llvm::GlobalVariable(
+        M, SlotArrayTy, false, llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantAggregateZero::get(SlotArrayTy), PoolName + "_slots");
+
+    // 8-entry direct-mapped cache: avoids pool allocation for repeated fn_ptrs
+    static const unsigned CACHE_SIZE = 8;
+    llvm::ArrayType *CacheTy = llvm::ArrayType::get(PtrTy, CACHE_SIZE);
+    new llvm::GlobalVariable(
+        M, CacheTy, false, llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantAggregateZero::get(CacheTy), PoolName + "_cache_keys");
+    new llvm::GlobalVariable(
+        M, CacheTy, false, llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantAggregateZero::get(CacheTy), PoolName + 
"_cache_wrappers");
+
+    // Pre-generate POOL_SIZE wrapper functions + build lookup table
+    llvm::SmallVector<llvm::Constant *, 64> WrappersConst;
+    for (unsigned I = 0; I < POOL_SIZE; ++I) {
+      std::string InstName = WrapperName + "_" + std::to_string(I);
+      llvm::Function *W = llvm::Function::Create(
+          DstFnType, llvm::GlobalValue::InternalLinkage, InstName, M);
+      W->addFnAttr(llvm::Attribute::NoInline);
+      W->addFnAttr(llvm::Attribute::NoUnwind);
+
+      llvm::BasicBlock *BB = llvm::BasicBlock::Create(Context, "entry", W);
+      llvm::IRBuilder<> B(BB);
+
+      llvm::Value *SlotPtr = B.CreateConstInBoundsGEP1_32(PtrTy, Slots, I);
+      llvm::Value *FP = B.CreateLoad(PtrTy, SlotPtr);
+
+      // Defensive null check: if slot was never written, skip call
+      llvm::BasicBlock *CallBB = llvm::BasicBlock::Create(Context, "call", W);
+      llvm::BasicBlock *NullBB = llvm::BasicBlock::Create(Context, "nullslot", 
W);
+      llvm::Value *IsNotNull = B.CreateIsNotNull(FP);
+      B.CreateCondBr(IsNotNull, CallBB, NullBB);
+
+      B.SetInsertPoint(CallBB);
+      llvm::SmallVector<llvm::Value *, 8> CallArgs;
+      auto ArgIt = W->arg_begin();
+      for (unsigned J = 0; J < SrcParams && ArgIt != W->arg_end(); ++J, 
++ArgIt) {
+        llvm::Value *A = &*ArgIt;
+        if (A->getType() != SrcFnType->getParamType(J))
+          A = B.CreateBitOrPointerCast(A, SrcFnType->getParamType(J));
+        CallArgs.push_back(A);
+      }
+      llvm::CallInst *Call = B.CreateCall(SrcFnType, FP, CallArgs);
+
+      if (DstFnType->getReturnType()->isVoidTy()) {
+        B.CreateRetVoid();
+        B.SetInsertPoint(NullBB);
+        B.CreateRetVoid();
+      } else {
+        llvm::Value *Ret = Call;
+        if (Ret->getType() != DstFnType->getReturnType())
+          Ret = B.CreateBitOrPointerCast(Ret, DstFnType->getReturnType());
+        B.CreateRet(Ret);
+        B.SetInsertPoint(NullBB);
+        B.CreateRet(llvm::Constant::getNullValue(DstFnType->getReturnType()));
+      }
 
-    // Return the result
-    if (DstFnType->getReturnType()->isVoidTy()) {
-      Builder.CreateRetVoid();
-    } else {
-      llvm::Value *Ret = Call;
-      if (Ret->getType() != DstFnType->getReturnType())
-        Ret = Builder.CreateBitOrPointerCast(Ret, DstFnType->getReturnType());
-      Builder.CreateRet(Ret);
+      WrappersConst.push_back(llvm::ConstantExpr::getBitCast(W, PtrTy));
     }
 
-    RuntimeWrapperCache[Key] = Wrapper;
+    // Create constant lookup table (not TLS — read-only function pointers)
+    llvm::ArrayType *WrapTblTy = llvm::ArrayType::get(PtrTy, POOL_SIZE);
+    WrapperTable = new llvm::GlobalVariable(
+        M, WrapTblTy, true, llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantArray::get(WrapTblTy, WrappersConst),
+        PoolName + "_wrappers");
+    WrapperTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+  } else {
+    Slots = M.getNamedGlobal(PoolName + "_slots");
+    WrapperTable = M.getNamedGlobal(PoolName + "_wrappers");
+    SrcFnType = llvm::cast<llvm::FunctionType>(
+        CGF.CGM.getTypes().ConvertType(QualType(SrcProto, 0)));
   }
 
-  // Store the function pointer in the global variable
-  CharUnits Alignment = CGF.CGM.getPointerAlign();
-  Address GlobalAddr(FnPtrGlobal, PtrTy, Alignment);
-
-  // Store the function pointer to be used by the wrapper
-  CGF.Builder.CreateStore(FnPtr, GlobalAddr);
+  // Runtime null check
+  llvm::Value *IsNull = CGF.Builder.CreateIsNull(FnPtr);
+  llvm::BasicBlock *NullContBB = llvm::BasicBlock::Create(Context, "nullcont", 
CGF.CurFn);
+  llvm::BasicBlock *NotNullBB = llvm::BasicBlock::Create(Context, "notnull", 
CGF.CurFn);
+  CharUnits PtrAlign = CGF.CGM.getPointerAlign();
+
+  if (IsImmediate) {
+    // === Immediate call: 1 TLS slot + 1 wrapper, no branches after null 
check ===
+    CGF.Builder.CreateCondBr(IsNull, NullContBB, NotNullBB);
+
+    CGF.Builder.SetInsertPoint(NotNullBB);
+    CGF.Builder.CreateStore(FnPtr, Address(ImmediateSlot, PtrTy, PtrAlign));
+    CGF.Builder.CreateBr(NullContBB);
+
+    CGF.Builder.SetInsertPoint(NullContBB);
+    llvm::PHINode *PHI = CGF.Builder.CreatePHI(PtrTy, 2);
+    PHI->addIncoming(llvm::ConstantExpr::getBitCast(ImmediateWrapper, PtrTy), 
NotNullBB);
+    PHI->addIncoming(llvm::ConstantPointerNull::get(PtrTy), NullContBB);
+    return PHI;
+  }
 
-  // Return the wrapper function
-  return Wrapper;
+  // === Store-for-later: pool with 64 slots + 8-entry cache + atomic counter 
===
+  CGF.Builder.CreateCondBr(IsNull, NullContBB, NotNullBB);
+
+  CGF.Builder.SetInsertPoint(NotNullBB);
+  llvm::ArrayType *WrapTblTy = llvm::ArrayType::get(PtrTy, POOL_SIZE);
+  static const unsigned CACHE_SIZE = 8;
+  llvm::ArrayType *CacheTy = llvm::ArrayType::get(PtrTy, CACHE_SIZE);
+  llvm::GlobalVariable *CacheKeys = M.getNamedGlobal(PoolName + "_cache_keys");
+  llvm::GlobalVariable *CacheWrappers = M.getNamedGlobal(PoolName + 
"_cache_wrappers");
+  llvm::BasicBlock *CacheHitBB = llvm::BasicBlock::Create(Context, "cachehit", 
CGF.CurFn);
+  llvm::BasicBlock *CacheMissBB = llvm::BasicBlock::Create(Context, 
"cachemiss", CGF.CurFn);
+  llvm::BasicBlock *ScanBB = llvm::BasicBlock::Create(Context, "scan", 
CGF.CurFn);
+  llvm::BasicBlock *ScanFoundBB = llvm::BasicBlock::Create(Context, "scanfnd", 
CGF.CurFn);
+  llvm::BasicBlock *ScanNextBB = llvm::BasicBlock::Create(Context, "scannxt", 
CGF.CurFn);
+  llvm::BasicBlock *AllocCheckBB = llvm::BasicBlock::Create(Context, 
"allocchk", CGF.CurFn);
+  llvm::BasicBlock *AllocStoreBB = llvm::BasicBlock::Create(Context, 
"allocstr", CGF.CurFn);
+  llvm::BasicBlock *OverflowBB = llvm::BasicBlock::Create(Context, "overflow", 
CGF.CurFn);
+  llvm::BasicBlock *ContBB = llvm::BasicBlock::Create(Context, "cont", 
CGF.CurFn);
+
+  // Cache lookup: idx = (fn_ptr >> 2) & 7
+  llvm::Value *CacheIdx = CGF.Builder.CreateAnd(
+      CGF.Builder.CreateLShr(
+          CGF.Builder.CreatePtrToInt(FnPtr, I32Ty),
+          llvm::ConstantInt::get(I32Ty, 2)),
+      llvm::ConstantInt::get(I32Ty, CACHE_SIZE - 1));
+  llvm::Value *CacheKeyGEP = CGF.Builder.CreateInBoundsGEP(
+      CacheTy, CacheKeys, {llvm::ConstantInt::get(I32Ty, 0), CacheIdx});
+  Address CacheKeyAddr(CacheKeyGEP, PtrTy, PtrAlign);
+  llvm::Value *CachedFn = CGF.Builder.CreateLoad(CacheKeyAddr);
+  llvm::Value *CacheHit = CGF.Builder.CreateICmpEQ(CachedFn, FnPtr);
+  llvm::Value *CacheWrapGEP = CGF.Builder.CreateInBoundsGEP(
+      CacheTy, CacheWrappers, {llvm::ConstantInt::get(I32Ty, 0), CacheIdx});
+  Address CacheWrapAddr(CacheWrapGEP, PtrTy, PtrAlign);
+
+  CGF.Builder.CreateCondBr(CacheHit, CacheHitBB, CacheMissBB);
+
+  // Cache hit: load cached wrapper, branch to cont
+  CGF.Builder.SetInsertPoint(CacheHitBB);
+  llvm::Value *CacheHitW = CGF.Builder.CreateLoad(CacheWrapAddr);
+  CGF.Builder.CreateBr(ContBB);
+
+  // Cache miss: scan pool for existing mapping
+  CGF.Builder.SetInsertPoint(CacheMissBB);
+  CGF.Builder.CreateBr(ScanBB);
+
+  // Scan loop: find fn_ptr in slots[0..POOL_SIZE-1]
+  CGF.Builder.SetInsertPoint(ScanBB);
+  llvm::PHINode *ScanIdx = CGF.Builder.CreatePHI(I32Ty, 2);
+  ScanIdx->addIncoming(llvm::ConstantInt::get(I32Ty, 0), CacheMissBB);
+  llvm::Value *ScanSlotGEP = CGF.Builder.CreateInBoundsGEP(
+      SlotArrayTy, Slots, {llvm::ConstantInt::get(I32Ty, 0), ScanIdx});
+  llvm::Value *ScanFn = CGF.Builder.CreateLoad(Address(ScanSlotGEP, PtrTy, 
PtrAlign));
+  llvm::Value *ScanMatch = CGF.Builder.CreateICmpEQ(ScanFn, FnPtr);
+  CGF.Builder.CreateCondBr(ScanMatch, ScanFoundBB, ScanNextBB);
+
+  // Found existing slot: update cache, return existing wrapper
+  CGF.Builder.SetInsertPoint(ScanFoundBB);
+  CGF.Builder.CreateStore(FnPtr, CacheKeyAddr);
+  llvm::Value *FoundWrapGEP = CGF.Builder.CreateInBoundsGEP(
+      WrapTblTy, WrapperTable, {llvm::ConstantInt::get(I32Ty, 0), ScanIdx});
+  llvm::Value *FoundW = CGF.Builder.CreateLoad(Address(FoundWrapGEP, PtrTy, 
PtrAlign));
+  CGF.Builder.CreateStore(FoundW, CacheWrapAddr);
+  CGF.Builder.CreateBr(ContBB);
+
+  // Advance scan
+  CGF.Builder.SetInsertPoint(ScanNextBB);
+  llvm::Value *NextIdx = CGF.Builder.CreateAdd(
+      ScanIdx, llvm::ConstantInt::get(I32Ty, 1));
+  llvm::Value *ScanEnd = CGF.Builder.CreateICmpUGE(
+      NextIdx, llvm::ConstantInt::get(I32Ty, POOL_SIZE));
+  ScanIdx->addIncoming(NextIdx, ScanNextBB);
+  CGF.Builder.CreateCondBr(ScanEnd, AllocCheckBB, ScanBB);
+
+  // Allocate new slot: atomic counter increment
+  CGF.Builder.SetInsertPoint(AllocCheckBB);
+  Address CounterAddr(Counter, I32Ty, PtrAlign);
+  llvm::Value *Slot = CGF.Builder.CreateAtomicRMW(
+      llvm::AtomicRMWInst::Add, CounterAddr,
+      llvm::ConstantInt::get(I32Ty, 1), llvm::AtomicOrdering::Monotonic);
+  llvm::Value *InBounds = CGF.Builder.CreateICmpULT(
+      Slot, llvm::ConstantInt::get(I32Ty, POOL_SIZE));
+  CGF.Builder.CreateCondBr(InBounds, AllocStoreBB, OverflowBB);
+
+  // Overflow
+  CGF.Builder.SetInsertPoint(OverflowBB);
+  CGF.Builder.CreateCall(llvm::Intrinsic::getOrInsertDeclaration(
+      &M, llvm::Intrinsic::trap));
+  CGF.Builder.CreateUnreachable();
+
+  // Store in pool + update cache
+  CGF.Builder.SetInsertPoint(AllocStoreBB);
+  llvm::Value *SlotIdx[] = {llvm::ConstantInt::get(I32Ty, 0), Slot};
+  llvm::Value *SlotGEP = CGF.Builder.CreateInBoundsGEP(
+      SlotArrayTy, Slots, SlotIdx);
+  CGF.Builder.CreateStore(FnPtr, Address(SlotGEP, PtrTy, PtrAlign));
+  llvm::Value *WrapGEP = CGF.Builder.CreateInBoundsGEP(
+      WrapTblTy, WrapperTable, SlotIdx);
+  llvm::Value *W = CGF.Builder.CreateLoad(Address(WrapGEP, PtrTy, PtrAlign));
+  CGF.Builder.CreateStore(FnPtr, CacheKeyAddr);
+  CGF.Builder.CreateStore(W, CacheWrapAddr);
+  CGF.Builder.CreateBr(ContBB);
+
+  // Null path
+  CGF.Builder.SetInsertPoint(NullContBB);
+  CGF.Builder.CreateBr(ContBB);
+
+  // ContBB: PHI for result
+  CGF.Builder.SetInsertPoint(ContBB);
+  llvm::PHINode *PHI = CGF.Builder.CreatePHI(PtrTy, 4);
+  PHI->addIncoming(CacheHitW, CacheHitBB);
+  PHI->addIncoming(FoundW, ScanFoundBB);
+  PHI->addIncoming(W, AllocStoreBB);
+  PHI->addIncoming(llvm::ConstantPointerNull::get(PtrTy), NullContBB);
+  return PHI;
 }
 
 std::unique_ptr<TargetCodeGenInfo>
diff --git a/clang/test/CodeGenWebAssembly/function-pointer-runtime-cast.c 
b/clang/test/CodeGenWebAssembly/function-pointer-runtime-cast.c
index d41a10cf248c6..890d8c8058943 100644
--- a/clang/test/CodeGenWebAssembly/function-pointer-runtime-cast.c
+++ b/clang/test/CodeGenWebAssembly/function-pointer-runtime-cast.c
@@ -8,9 +8,9 @@
 typedef void (*OneArgFunc)(void *);
 typedef void (*TwoArgFunc)(void *, void *);
 
-// Check for both TLS globals at the top of the output
-// CHECK: @__wasm_runtime_wrapper_vi_to_vii_fptr = linkonce_odr thread_local 
global ptr null
-// CHECK: @__wasm_runtime_wrapper_iii_to_vii_fptr = linkonce_odr thread_local 
global ptr null
+// Pool counters
+// CHECK: @__wasm_runtime_pool___wasm_runtime_wrapper_vi_to_vii{{.*}}_counter 
= internal thread_local global i32 0
+// CHECK: @__wasm_runtime_pool___wasm_runtime_wrapper_iii_to_vii{{.*}}_counter 
= internal thread_local global i32 0
 
 // A function with one argument
 void my_one_arg_func(void *ptr) {
@@ -20,15 +20,20 @@ void my_one_arg_func(void *ptr) {
 // Test case 1: Direct call of casted runtime function pointer
 // CHECK-LABEL: @runtime_cast_caller
 void runtime_cast_caller(OneArgFunc fp, void *data) {
-  // Cast the runtime parameter from 1-arg to 2-arg signature and call directly
-  // CHECK: store ptr %{{.*}}, ptr @__wasm_runtime_wrapper_vi_to_vii_fptr
-  // CHECK: call void @__wasm_runtime_wrapper_vi_to_vii(ptr
+  // CHECK: atomicrmw add ptr 
@__wasm_runtime_pool___wasm_runtime_wrapper_vi_to_vii{{.*}}_counter, i32 1
+  // CHECK: store ptr %{{.*}}, ptr %
+  // CHECK: load ptr, ptr %
   ((TwoArgFunc)fp)(data, (void*)0);
 }
 
-// The runtime wrapper should be generated once and shared by both cases
-// CHECK-LABEL: define linkonce_odr void @__wasm_runtime_wrapper_vi_to_vii(ptr 
%0, ptr %1)
-// CHECK: %{{.*}} = load ptr, ptr @__wasm_runtime_wrapper_vi_to_vii_fptr
+// Pool wrapper functions (internal linkage, one per slot)
+// CHECK-LABEL: define internal void 
@__wasm_runtime_wrapper_vi_to_vii{{.*}}_0(ptr %0, ptr %1)
+// CHECK: load ptr, ptr
+// CHECK: call void %{{.*}}(ptr %0)
+// CHECK: ret void
+
+// CHECK-LABEL: define internal void 
@__wasm_runtime_wrapper_vi_to_vii{{.*}}_1(ptr %0, ptr %1)
+// CHECK: load ptr, ptr
 // CHECK: call void %{{.*}}(ptr %0)
 // CHECK: ret void
 
@@ -43,8 +48,7 @@ void library_function(TwoArgFunc func, void *data) {
 // CHECK-LABEL: @indirect_caller
 void indirect_caller(OneArgFunc fp, void *data) {
   // Cast and pass to another function (like g_list_free_full does)
-  // CHECK: store ptr %{{.*}}, ptr @__wasm_runtime_wrapper_vi_to_vii_fptr
-  // CHECK: call void @library_function(ptr noundef 
@__wasm_runtime_wrapper_vi_to_vii
+  // CHECK: atomicrmw add ptr 
@__wasm_runtime_pool___wasm_runtime_wrapper_vi_to_vii{{.*}}_counter, i32 1
   library_function((TwoArgFunc)fp, data);
 }
 
@@ -54,28 +58,41 @@ int my_compare_func(void *a, void *b) {
 }
 
 // Test case 3: Same param count, return type coercion (int -> void)
-// This simulates g_slist_sort where int compare(void*, void*) is cast to void 
func(void*, void*)
-// Use a typedef to create a function pointer type with int return
 typedef int (*CompareFunc)(void *, void *);
 
 // CHECK-LABEL: @test_return_coercion
-void test_return_coercion(CompareFunc fp, void *data) {
-  // Cast int(void*, void*) -> void(void*, void*) on a runtime parameter
-  // CHECK: store ptr %{{.*}}, ptr @__wasm_runtime_wrapper_iii_to_vii_fptr
-  // CHECK: call void @library_function(ptr noundef 
@__wasm_runtime_wrapper_iii_to_vii
+void test_return_coercion(void *opaque_fp, void *data) {
+  // Load through void* to prevent static thunk tracing
+  // CHECK: atomicrmw add ptr 
@__wasm_runtime_pool___wasm_runtime_wrapper_iii_to_vii{{.*}}_counter, i32 1
+  CompareFunc fp = (CompareFunc)(__typeof__(CompareFunc))opaque_fp;
   library_function((TwoArgFunc)fp, data);
 }
 
-// The runtime wrapper for int->void coercion with same param count
-// CHECK-LABEL: define linkonce_odr void 
@__wasm_runtime_wrapper_iii_to_vii(ptr %0, ptr %1)
-// CHECK: %{{.*}} = load ptr, ptr @__wasm_runtime_wrapper_iii_to_vii_fptr
-// CHECK: %{{.*}} = call i32 %{{.*}}(ptr %0, ptr %1)
+// Pool wrapper for int->void coercion
+// CHECK-LABEL: define internal void 
@__wasm_runtime_wrapper_iii_to_vii{{.*}}_0(ptr %0, ptr %1)
+// CHECK: load ptr, ptr
+// CHECK: call i32 %{{.*}}(ptr %0, ptr %1)
 // CHECK: ret void
 
+// Store a casted function pointer for later. This simulates the closure
+// pattern where multiple closures store different marshals via the same
+// cast expression.
+TwoArgFunc saved1;
+TwoArgFunc saved2;
+
+// CHECK-LABEL: @test_store_for_later
+void test_store_for_later(OneArgFunc fp1, OneArgFunc fp2) {
+  // Each store claims a different pool slot
+  // CHECK: atomicrmw add ptr 
@__wasm_runtime_pool___wasm_runtime_wrapper_vi_to_vii{{.*}}_counter, i32 1
+  saved1 = (TwoArgFunc)fp1;
+  // CHECK: atomicrmw add ptr 
@__wasm_runtime_pool___wasm_runtime_wrapper_vi_to_vii{{.*}}_counter, i32 1
+  saved2 = (TwoArgFunc)fp2;
+}
+
 // CHECK-LABEL: @test
 void test() {
-  // Test both scenarios
   runtime_cast_caller(my_one_arg_func, (void*)0);
   indirect_caller(my_one_arg_func, (void*)0);
-  test_return_coercion(my_compare_func, (void*)0);
+  test_return_coercion((void*)my_compare_func, (void*)0);
+  test_store_for_later(my_one_arg_func, my_one_arg_func);
 }

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to