tqchen commented on code in PR #283:
URL: https://github.com/apache/tvm-ffi/pull/283#discussion_r2557920769
##########
include/tvm/ffi/extra/cuda/cubin_launcher.h:
##########
@@ -93,56 +90,140 @@ struct dim3 {
dim3(unsigned int x_, unsigned int y_, unsigned int z_) : x(x_), y(y_),
z(z_) {}
};
+/*!
+ * \brief Macro to embed a CUBIN module with static initialization.
+ *
+ * This macro declares external symbols for embedded CUBIN data and creates
+ * a singleton struct to manage the CubinModule instance. The CUBIN data
+ * symbols should be named __tvm_ffi__cubin_<name> and
__tvm_ffi__cubin_<name>_end,
+ * typically created using objcopy and ld.
+ *
+ * ## Creating Embedded CUBIN Symbols with objcopy
+ *
+ * To embed a CUBIN file into your binary, follow these steps:
+ *
+ * ### Step 1: Compile CUDA kernel to CUBIN
+ * \code{.bash}
+ * nvcc --cubin -arch=sm_75 kernel.cu -o kernel.cubin
+ * \endcode
+ *
+ * ### Step 2: Convert CUBIN to object file with ld
+ * \code{.bash}
+ * ld -r -b binary -o kernel_data.o kernel.cubin
+ * \endcode
+ *
+ * This creates an object file with symbols based on the input filename:
+ * - _binary_kernel_cubin_start
+ * - _binary_kernel_cubin_end
+ * - _binary_kernel_cubin_size
+ *
+ * ### Step 3: Rename symbols with objcopy
+ * \code{.bash}
+ * objcopy --rename-section .data=.rodata,alloc,load,readonly,data,contents \
+ * --redefine-sym _binary_kernel_cubin_start=__tvm_ffi__cubin_<name> \
+ * --redefine-sym _binary_kernel_cubin_end=__tvm_ffi__cubin_<name>_end
\
+ * kernel_data.o
+ * \endcode
+ *
+ * Replace `<name>` with your chosen identifier (e.g., "env", "my_kernels").
+ *
+ * ### Step 4: Link the object file with your library/executable
+ * \code{.bash}
+ * g++ -o mylib.so -shared mycode.cc kernel_data.o -Wl,-z,noexecstack
+ * \endcode
+ *
+ * Note: The `-z,noexecstack` flag marks the stack as non-executable, which is
+ * required for security as the embedded object file lacks a .note.GNU-stack
section.
+ *
+ * ## CMake Example
+ *
+ * \code{.cmake}
+ * add_custom_command(OUTPUT kernel_data.o
+ * COMMAND ${CMAKE_LINKER} -r -b binary -o kernel_data.o kernel.cubin
+ * COMMAND ${CMAKE_OBJCOPY}
+ * --rename-section .data=.rodata,alloc,load,readonly,data,contents
+ * --redefine-sym _binary_kernel_cubin_start=__tvm_ffi__cubin_env
+ * --redefine-sym _binary_kernel_cubin_end=__tvm_ffi__cubin_env_end
+ * kernel_data.o
+ * DEPENDS kernel.cubin)
+ *
+ * add_library(mylib SHARED mycode.cc kernel_data.o)
+ * target_link_options(mylib PRIVATE "LINKER:-z,noexecstack")
+ * \endcode
+ *
+ * ## Usage in C++ Code
+ *
+ * \code{.cpp}
+ * // Declare the embedded CUBIN module (use the same name as in objcopy)
+ * TVM_FFI_EMBED_CUBIN(env);
+ *
+ * void MyFunction() {
+ * // Get kernel from embedded CUBIN (cached in static variable)
+ * static auto kernel = TVM_FFI_EMBED_CUBIN_GET_KERNEL(env, "my_kernel");
+ * // Use kernel...
+ * }
+ * \endcode
+ *
+ * \param name The identifier for this embedded CUBIN module (must match the
+ * symbol names created with objcopy).
+ */
+#define TVM_FFI_EMBED_CUBIN(name)
\
+ extern "C" const char __tvm_ffi__cubin_##name[];
\
+ extern "C" const char __tvm_ffi__cubin_##name##_end[];
\
+ namespace {
\
+ struct EmbedCubinModule_##name {
\
+ tvm::ffi::CubinModule mod{tvm::ffi::Bytes(
\
+ __tvm_ffi__cubin_##name,
\
+ static_cast<size_t>(__tvm_ffi__cubin_##name##_end -
__tvm_ffi__cubin_##name))}; \
+ static EmbedCubinModule_##name* Global() {
\
+ static EmbedCubinModule_##name inst;
\
+ return &inst;
\
+ }
\
+ };
\
+ } /* anonymous namespace */
+
+/*!
+ * \brief Macro to get a kernel from an embedded CUBIN module.
+ *
+ * This macro retrieves a kernel by name from a previously declared embedded
+ * CUBIN module (using TVM_FFI_EMBED_CUBIN). The result is a CubinKernel
object.
+ * It's recommended to store the result in a static variable to avoid repeated
+ * kernel lookups.
+ *
+ * Example usage:
+ * \code
+ * static auto kernel = TVM_FFI_EMBED_CUBIN_GET_KERNEL(my_kernels,
"kernel_name");
+ * \endcode
+ *
+ * \param name The identifier of the embedded CUBIN module.
+ * \param kernel_name The name of the kernel function (as a string literal).
+ * \return A CubinKernel object for the specified kernel.
+ */
+#define TVM_FFI_EMBED_CUBIN_GET_KERNEL(name, kernel_name) \
+ (EmbedCubinModule_##name::Global()->mod[kernel_name])
+
// Forward declaration
class CubinKernel;
/*!
* \brief CUDA CUBIN module loader and manager.
*
* This class provides a RAII wrapper around CUDA driver API's library
management.
- * It loads a CUBIN module from memory or file and manages the library handle.
+ * It loads a CUBIN module from memory and manages the library handle.
* Supports multi-GPU execution using CUDA primary contexts.
*/
class CubinModule {
public:
/*!
* \brief Load CUBIN module from memory.
*
- * \param data Pointer to CUBIN binary data in memory.
- * \param size Size of the CUBIN binary data in bytes.
+ * \param bytes CUBIN binary data as a Bytes object.
* \note Calls cuInit(0) to ensure CUDA is initialized.
*/
- CubinModule(const void* data, uint64_t size) {
+ explicit CubinModule(const Bytes& bytes) {
Review Comment:
i think for binary that is dangerous, let us do begin and end
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]