Re: [PR] [Feature] Add cubin launcher utility as an extra header [tvm-ffi]

via GitHub Mon, 24 Nov 2025 13:02:45 -0800


tqchen commented on code in PR #283:
URL: https://github.com/apache/tvm-ffi/pull/283#discussion_r2557714541



##########
examples/cubin_launcher/src/lib_dynamic.cc:
##########
@@ -27,27 +27,42 @@
 #include <tvm/ffi/container/tensor.h>
 #include <tvm/ffi/error.h>
 #include <tvm/ffi/extra/c_env_api.h>
-#include <tvm/ffi/extra/cubin_launcher.h>
+#include <tvm/ffi/extra/cuda/cubin_launcher.h>
 #include <tvm/ffi/function.h>
 #include <tvm/ffi/string.h>
 
 #include <cstdint>
+#include <fstream>
 #include <memory>
+#include <vector>
 
 namespace cubin_dynamic {
 
 // Global CUBIN module and kernels (loaded dynamically)
 static std::unique_ptr<tvm::ffi::CubinModule> g_cubin_module;
 static std::unique_ptr<tvm::ffi::CubinKernel> g_add_one_kernel;
 static std::unique_ptr<tvm::ffi::CubinKernel> g_mul_two_kernel;
+static std::vector<char> g_cubin_data;  // Storage for CUBIN file data
 
 /*!
  * \brief Load CUBIN from file path.
  * \param path Path to the CUBIN file.
  */
 void LoadCubin(const tvm::ffi::String& path) {
-  // Create new module and kernels
-  g_cubin_module = std::make_unique<tvm::ffi::CubinModule>(path.c_str());
+  // Read file into memory
+  std::ifstream file(std::string(path), std::ios::binary | std::ios::ate);
+  TVM_FFI_CHECK(file.is_open(), RuntimeError) << "Failed to open CUBIN file: " 
<< path;
+

Review Comment:
   maybe we can remove lib_dynamic assume we have robust embedding path



##########
python/tvm_ffi/cpp/nvrtc.py:
##########
@@ -0,0 +1,296 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""NVRTC (NVIDIA Runtime Compilation) utilities for compiling CUDA source to 
CUBIN."""
+
+from __future__ import annotations
+
+import subprocess
+from collections.abc import Mapping
+from pathlib import Path
+from typing import Sequence
+
+
+def nvrtc_compile(  # noqa: PLR0912, PLR0915

Review Comment:
   not have to be today, would be nice to support fatbin bundles through 
multiple nvrtc compile and them bundle



##########
include/tvm/ffi/extra/cuda/cubin_launcher.h:
##########
@@ -93,56 +90,140 @@ struct dim3 {
   dim3(unsigned int x_, unsigned int y_, unsigned int z_) : x(x_), y(y_), 
z(z_) {}
 };
 
+/*!
+ * \brief Macro to embed a CUBIN module with static initialization.
+ *
+ * This macro declares external symbols for embedded CUBIN data and creates
+ * a singleton struct to manage the CubinModule instance. The CUBIN data
+ * symbols should be named __tvm_ffi__cubin_<name> and 
__tvm_ffi__cubin_<name>_end,
+ * typically created using objcopy and ld.
+ *
+ * ## Creating Embedded CUBIN Symbols with objcopy
+ *
+ * To embed a CUBIN file into your binary, follow these steps:
+ *
+ * ### Step 1: Compile CUDA kernel to CUBIN
+ * \code{.bash}
+ * nvcc --cubin -arch=sm_75 kernel.cu -o kernel.cubin
+ * \endcode
+ *
+ * ### Step 2: Convert CUBIN to object file with ld
+ * \code{.bash}
+ * ld -r -b binary -o kernel_data.o kernel.cubin
+ * \endcode
+ *
+ * This creates an object file with symbols based on the input filename:
+ * - _binary_kernel_cubin_start
+ * - _binary_kernel_cubin_end
+ * - _binary_kernel_cubin_size
+ *
+ * ### Step 3: Rename symbols with objcopy
+ * \code{.bash}
+ * objcopy --rename-section .data=.rodata,alloc,load,readonly,data,contents \
+ *         --redefine-sym _binary_kernel_cubin_start=__tvm_ffi__cubin_<name> \
+ *         --redefine-sym _binary_kernel_cubin_end=__tvm_ffi__cubin_<name>_end 
\
+ *         kernel_data.o
+ * \endcode
+ *
+ * Replace `<name>` with your chosen identifier (e.g., "env", "my_kernels").
+ *
+ * ### Step 4: Link the object file with your library/executable
+ * \code{.bash}
+ * g++ -o mylib.so -shared mycode.cc kernel_data.o -Wl,-z,noexecstack
+ * \endcode
+ *
+ * Note: The `-z,noexecstack` flag marks the stack as non-executable, which is
+ * required for security as the embedded object file lacks a .note.GNU-stack 
section.
+ *
+ * ## CMake Example
+ *
+ * \code{.cmake}
+ * add_custom_command(OUTPUT kernel_data.o
+ *   COMMAND ${CMAKE_LINKER} -r -b binary -o kernel_data.o kernel.cubin
+ *   COMMAND ${CMAKE_OBJCOPY}
+ *     --rename-section .data=.rodata,alloc,load,readonly,data,contents
+ *     --redefine-sym _binary_kernel_cubin_start=__tvm_ffi__cubin_env
+ *     --redefine-sym _binary_kernel_cubin_end=__tvm_ffi__cubin_env_end
+ *     kernel_data.o
+ *   DEPENDS kernel.cubin)
+ *
+ * add_library(mylib SHARED mycode.cc kernel_data.o)
+ * target_link_options(mylib PRIVATE "LINKER:-z,noexecstack")
+ * \endcode
+ *
+ * ## Usage in C++ Code
+ *
+ * \code{.cpp}
+ * // Declare the embedded CUBIN module (use the same name as in objcopy)
+ * TVM_FFI_EMBED_CUBIN(env);
+ *
+ * void MyFunction() {
+ *   // Get kernel from embedded CUBIN (cached in static variable)
+ *   static auto kernel = TVM_FFI_EMBED_CUBIN_GET_KERNEL(env, "my_kernel");
+ *   // Use kernel...
+ * }
+ * \endcode
+ *
+ * \param name The identifier for this embedded CUBIN module (must match the
+ *             symbol names created with objcopy).
+ */
+#define TVM_FFI_EMBED_CUBIN(name)                                              
         \
+  extern "C" const char __tvm_ffi__cubin_##name[];                             
         \
+  extern "C" const char __tvm_ffi__cubin_##name##_end[];                       
         \
+  namespace {                                                                  
         \
+  struct EmbedCubinModule_##name {                                             
         \
+    tvm::ffi::CubinModule mod{tvm::ffi::Bytes(                                 
         \
+        __tvm_ffi__cubin_##name,                                               
         \
+        static_cast<size_t>(__tvm_ffi__cubin_##name##_end - 
__tvm_ffi__cubin_##name))}; \
+    static EmbedCubinModule_##name* Global() {                                 
         \
+      static EmbedCubinModule_##name inst;                                     
         \
+      return &inst;                                                            
         \
+    }                                                                          
         \
+  };                                                                           
         \
+  } /* anonymous namespace */
+
+/*!
+ * \brief Macro to get a kernel from an embedded CUBIN module.
+ *
+ * This macro retrieves a kernel by name from a previously declared embedded
+ * CUBIN module (using TVM_FFI_EMBED_CUBIN). The result is a CubinKernel 
object.
+ * It's recommended to store the result in a static variable to avoid repeated
+ * kernel lookups.
+ *
+ * Example usage:
+ * \code
+ * static auto kernel = TVM_FFI_EMBED_CUBIN_GET_KERNEL(my_kernels, 
"kernel_name");
+ * \endcode
+ *
+ * \param name The identifier of the embedded CUBIN module.
+ * \param kernel_name The name of the kernel function (as a string literal).
+ * \return A CubinKernel object for the specified kernel.
+ */
+#define TVM_FFI_EMBED_CUBIN_GET_KERNEL(name, kernel_name) \
+  (EmbedCubinModule_##name::Global()->mod[kernel_name])
+
 // Forward declaration
 class CubinKernel;
 
 /*!
  * \brief CUDA CUBIN module loader and manager.
  *
  * This class provides a RAII wrapper around CUDA driver API's library 
management.
- * It loads a CUBIN module from memory or file and manages the library handle.
+ * It loads a CUBIN module from memory and manages the library handle.
  * Supports multi-GPU execution using CUDA primary contexts.
  */
 class CubinModule {
  public:
   /*!
    * \brief Load CUBIN module from memory.
    *
-   * \param data Pointer to CUBIN binary data in memory.
-   * \param size Size of the CUBIN binary data in bytes.
+   * \param bytes CUBIN binary data as a Bytes object.
    * \note Calls cuInit(0) to ensure CUDA is initialized.
    */
-  CubinModule(const void* data, uint64_t size) {
+  explicit CubinModule(const Bytes& bytes) {

Review Comment:
   actually not explicitly creating Bytes is better, in the case of large view, 
maybe we can start by changing to `const char* data_begin, const char*data_end`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [Feature] Add cubin launcher utility as an extra header [tvm-ffi]

Reply via email to