This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/tvm-ffi.git
The following commit(s) were added to refs/heads/dev by this push:
new c100338 [DOCS] Add C example (#15)
c100338 is described below
commit c100338de52825097ddc44bbac3d03a92f45b33a
Author: Tianqi Chen <[email protected]>
AuthorDate: Sun Sep 14 22:33:26 2025 -0400
[DOCS] Add C example (#15)
This PR adds a minimal runnable C example that demonstrates how
to interface with compiler codegen.
---
cmake/Utils/AddGoogleTest.cmake | 2 +-
docs/get_started/quick_start.md | 72 ++++++++++++++++++++++++++++++++++--
docs/guides/compiler_integration.md | 34 ++++++++++-------
examples/quick_start/CMakeLists.txt | 7 ++++
examples/quick_start/README.md | 4 +-
examples/quick_start/run_example.py | 21 +++++++++++
examples/quick_start/src/add_one_c.c | 72 ++++++++++++++++++++++++++++++++++++
include/tvm/ffi/c_api.h | 35 +++++++++---------
pyproject.toml | 2 +-
python/tvm_ffi/config.py | 6 ++-
10 files changed, 216 insertions(+), 39 deletions(-)
diff --git a/cmake/Utils/AddGoogleTest.cmake b/cmake/Utils/AddGoogleTest.cmake
index e5a7a84..251d713 100644
--- a/cmake/Utils/AddGoogleTest.cmake
+++ b/cmake/Utils/AddGoogleTest.cmake
@@ -48,7 +48,7 @@ macro(tvm_ffi_add_googletest target_name)
target_link_libraries(${target_name} PRIVATE gtest_main)
gtest_discover_tests(${target_name}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
- TEST_DISCOVERY_TIMEOUT 300
+ TEST_DISCOVERY_TIMEOUT 600
DISCOVERY_MODE PRE_TEST
PROPERTIES
VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
diff --git a/docs/get_started/quick_start.md b/docs/get_started/quick_start.md
index 8b127c9..449c1db 100644
--- a/docs/get_started/quick_start.md
+++ b/docs/get_started/quick_start.md
@@ -72,6 +72,7 @@ tensor and expose that function as TVM FFI compatible
function. The key file str
examples/quick_start/
├── src/
│ ├── add_one_cpu.cc # CPU implementation
+│ ├── add_one_c.c # A low-level C based implementation
│ ├── add_one_cuda.cu # CUDA implementation
│ └── run_example.cc # C++ usage example
├── run_example.py # Python usage example
@@ -201,16 +202,81 @@ shows how to run the example exported function in C++.
#include <tvm/ffi/container/tensor.h>
#include <tvm/ffi/extra/module.h>
-void CallAddOne(DLTensor* x, DLTensor *y) {
- namespace ffi = tvm::ffi;
+namespace ffi = tvm::ffi;
+
+void CallAddOne(ffi::Tensor x, ffi::Tensor y) {
ffi::Module mod = ffi::Module::LoadFromFile("build/add_one_cpu.so");
ffi::Function add_one_cpu = mod->GetFunction("add_one_cpu").value();
add_one_cpu(x, y);
}
```
+## Advanced: Minimal C ABI demonstration
+
+For those who need to understand the low-level C ABI or are implementing
+compiler codegen, we also provided an example that is C only as follows:
+
+```c
+#include <tvm/ffi/c_api.h>
+#include <tvm/ffi/extra/c_env_api.h>
+
+// Helper to extract DLTensor from TVMFFIAny
+int ReadDLTensorPtr(const TVMFFIAny *value, DLTensor** out) {
+ if (value->type_index == kTVMFFIDLTensorPtr) {
+ *out = (DLTensor*)(value->v_ptr);
+ return 0;
+ }
+ if (value->type_index != kTVMFFITensor) {
+ TVMFFIErrorSetRaisedFromCStr("ValueError", "Expects a Tensor input");
+ return -1;
+ }
+ *out = (DLTensor*)((char*)(value->v_obj) + sizeof(TVMFFIObject));
+ return 0;
+}
+
+// Raw C FFI function
+int __tvm_ffi_add_one_c(
+ void* handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* result
+) {
+ DLTensor *x, *y;
+
+ // Extract tensor arguments
+ if (ReadDLTensorPtr(&args[0], &x) == -1) return -1;
+ if (ReadDLTensorPtr(&args[1], &y) == -1) return -1;
+
+ // Get current stream for device synchronization (e.g., CUDA)
+ // not needed for CPU, just keep here for demonstration purpose
+ void* stream = TVMFFIEnvGetStream(x->device.device_type,
x->device.device_id);
+
+ // Perform computation
+ for (int i = 0; i < x->shape[0]; ++i) {
+ ((float*)(y->data))[i] = ((float*)(x->data))[i] + 1;
+ }
+ return 0; // Success
+}
+```
+To compile this code, you need to add
{py:func}`tvm_ffi.libinfo.find_include_paths` to your include
+path and link the shared library that can be found through
{py:func}`tvm_ffi.libinfo.find_libtvm_ffi`.
+We also provide command line tools to link, so you can compile with the
following command:
+
+```bash
+gcc -shared -fPIC `tvm-ffi-config --cflags` \
+ src/add_one_c.c -o build/add_one_c.so \
+ `tvm-ffi-config --ldflags` `tvm-ffi-config --libs`
+```
+
+The main takeaway points are:
+- Function symbols follow name `int __tvm_ffi_<name>`
+- The function follows signaure of `TVMFFISafeCallType`
+- Use `TVMFFIAny` to handle dynamic argument types
+- Return `0` for success, `-1` for error (set via
`TVMFFIErrorSetRaisedFromCStr`)
+- This function can be compiled using a c compiler and loaded in the same one
as
+ other libraries in this example.
+
## Summary Key Concepts
- **TVM_FFI_DLL_EXPORT_TYPED_FUNC** exposes a c++ function into tvm-ffi C ABI
-- **DLTensor** is a universal tensor structure that enables zero-copy exchange
of array data
+- **ffi::Tensor** is a universal tensor structure that enables zero-copy
exchange of array data
- **Module loading** is provided by tvm ffi APIs in multiple languages.
+- **C ABI** is provided for easy low-level integration
+
diff --git a/docs/guides/compiler_integration.md
b/docs/guides/compiler_integration.md
index 0eaf1ff..a1355af 100644
--- a/docs/guides/compiler_integration.md
+++ b/docs/guides/compiler_integration.md
@@ -35,43 +35,49 @@ following options:
use {c:macro}`TVM_FFI_DLL_EXPORT_TYPED_FUNC` to expose the symbol.
The following code snippet shows C code that corresponds to a
-function performing `add_one` under the ABI. It is reasonably straightforward
for
+function performing `add_one_c` under the ABI. It is reasonably
straightforward for
low-level code generators to replicate this C logic.
+You can run this code as part of the [quick start
example](https://github.com/apache/tvm-ffi/tree/dev/examples/quick_start).
```c
#include <tvm/ffi/c_api.h>
#include <tvm/ffi/extra/c_env_api.h>
// Helper function to extract DLTensor from TVMFFIAny (can be inlined into
generated code)
-int ReadDLTensorPtr(const TVMFFIAny *value, DLTensor* out) {
+int ReadDLTensorPtr(const TVMFFIAny *value, DLTensor** out) {
if (value->type_index == kTVMFFIDLTensorPtr) {
- *out = static_cast<DLTensor*>(value->v_ptr);
+ *out = (DLTensor*)(value->v_ptr);
return 0;
}
- if (value->type_index == kTVMFFITensor) {
+ if (value->type_index != kTVMFFITensor) {
+ // Use TVMFFIErrorSetRaisedFromCStr to set an error which will
+ // be propagated to the caller
TVMFFIErrorSetRaisedFromCStr("ValueError", "Expects a Tensor input");
return -1;
}
- *out = reinterpret_cast<DLTensor*>(
- reinterpret_cast<char*>(value->v_obj) + sizeof(TVMFFIObject));
+ *out = (DLTensor*)((char*)(value->v_obj) + sizeof(TVMFFIObject));
return 0;
}
// FFI function implementing add_one operation
-int __tvm_ffi_add_one(
+int __tvm_ffi_add_one_c(
void* handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* result
) {
- DLTensor *a, *b, *c;
+ DLTensor *x, *y;
// Extract tensor arguments
- if (ReadDLTensorPtr(&args[0], &a) == -1) return -1;
- if (ReadDLTensorPtr(&args[1], &b) == -1) return -1;
- if (ReadDLTensorPtr(&args[2], &c) == -1) return -1;
+ // return -1 for error, error is set through TVMFFIErrorSetRaisedFromCStr
+ if (ReadDLTensorPtr(&args[0], &x) == -1) return -1;
+ if (ReadDLTensorPtr(&args[1], &y) == -1) return -1;
// Get current stream for device synchronization (e.g., CUDA)
- void* stream = TVMFFIEnvGetStream(a->device.device_type,
a->device.device_id);
+ // not needed for CPU, just keep here for demonstration purpose
+ void* stream = TVMFFIEnvGetStream(x->device.device_type,
x->device.device_id);
- // Generated computation code would follow here to perform the actual
operation
- // on tensors a, b, c and store result in c
+ // perform the actual operation
+ for (int i = 0; i < x->shape[0]; ++i) {
+ ((float*)(y->data))[i] = ((float*)(x->data))[i] + 1;
+ }
+ // return 0 for success run
return 0;
}
```
diff --git a/examples/quick_start/CMakeLists.txt
b/examples/quick_start/CMakeLists.txt
index 0553098..0f6ea11 100644
--- a/examples/quick_start/CMakeLists.txt
+++ b/examples/quick_start/CMakeLists.txt
@@ -31,14 +31,21 @@ find_package(tvm_ffi CONFIG REQUIRED)
# use the projects as usual
add_library(add_one_cpu SHARED src/add_one_cpu.cc)
+add_library(add_one_c SHARED src/add_one_c.c)
target_link_libraries(add_one_cpu tvm_ffi_header)
target_link_libraries(add_one_cpu tvm_ffi_shared)
+target_link_libraries(add_one_c tvm_ffi_shared)
# show as add_one_cpu.so
set_target_properties(
add_one_cpu PROPERTIES
PREFIX ""
SUFFIX ".so"
)
+set_target_properties(
+ add_one_c PROPERTIES
+ PREFIX ""
+ SUFFIX ".so"
+)
# Check if CUDA is available
if(NOT WIN32)
diff --git a/examples/quick_start/README.md b/examples/quick_start/README.md
index 002d437..d4d130e 100644
--- a/examples/quick_start/README.md
+++ b/examples/quick_start/README.md
@@ -52,7 +52,7 @@ You can also compile the modules directly using
flags provided by the `tvm-ffi-config` tool.
```bash
-g++ -shared -fPIC `tvm-ffi-config --cxxflags` \
- src/add_one_cpu.cc -o build/add_one_cpu.so \
+gcc -shared -fPIC `tvm-ffi-config --cflags` \
+ src/add_one_c.c -o build/add_one_c.so \
`tvm-ffi-config --ldflags` `tvm-ffi-config --libs`
```
diff --git a/examples/quick_start/run_example.py
b/examples/quick_start/run_example.py
index c7a2fcb..e126af1 100644
--- a/examples/quick_start/run_example.py
+++ b/examples/quick_start/run_example.py
@@ -52,6 +52,26 @@ def run_add_one_cpu():
print(y)
+def run_add_one_c():
+ """Load the add_one_c module and call the add_one_c function."""
+ mod = tvm_ffi.load_module("build/add_one_c.so")
+
+ x = numpy.array([1, 2, 3, 4, 5], dtype=numpy.float32)
+ y = numpy.empty_like(x)
+ mod.add_one_c(x, y)
+ print("numpy.result after add_one_c(x, y)")
+ print(x)
+
+ if torch is None:
+ return
+
+ x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32)
+ y = torch.empty_like(x)
+ mod.add_one_c(x, y)
+ print("torch.result after add_one_c(x, y)")
+ print(y)
+
+
def run_add_one_cuda():
"""Load the add_one_cuda module and call the add_one_cuda function."""
if torch is None or not torch.cuda.is_available():
@@ -76,6 +96,7 @@ def run_add_one_cuda():
def main():
"""Main function to run the example."""
run_add_one_cpu()
+ run_add_one_c()
run_add_one_cuda()
diff --git a/examples/quick_start/src/add_one_c.c
b/examples/quick_start/src/add_one_c.c
new file mode 100644
index 0000000..a12987e
--- /dev/null
+++ b/examples/quick_start/src/add_one_c.c
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/ffi/c_api.h>
+#include <tvm/ffi/extra/c_env_api.h>
+
+// This is a raw C variant of the add_one_cpu function
+// it is used to demonstrate how low-level mechanism works
+// to construct a tvm ffi compatible function
+//
+// This function can also serve as a reference for how to implement
+// a compiler codegen to target tvm ffi
+//
+// if you are looking for a more high-level way to construct a tvm ffi
compatible function,
+// please refer to the add_one_cpu.cc instead
+/*!
+ * \brief Helper code to read DLTensor from TVMFFIAny, can be inlined into
generated code
+ * \param value The TVMFFIAny to read from
+ * \param out The DLTensor to read into
+ * \return 0 on success, -1 on error
+ */
+int ReadDLTensorPtr(const TVMFFIAny* value, DLTensor** out) {
+ if (value->type_index == kTVMFFIDLTensorPtr) {
+ *out = (DLTensor*)(value->v_ptr);
+ return 0;
+ }
+ if (value->type_index != kTVMFFITensor) {
+ // Use TVMFFIErrorSetRaisedFromCStr to set an error which will
+ // be propagated to the caller
+ TVMFFIErrorSetRaisedFromCStr("ValueError", "Expects a Tensor input");
+ return -1;
+ }
+ *out = (DLTensor*)((char*)(value->v_obj) + sizeof(TVMFFIObject));
+ return 0;
+}
+
+// FFI function implementing add_one operation
+int __tvm_ffi_add_one_c(
//
+ void* handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* result
//
+) {
+ DLTensor *x, *y;
+ // Extract tensor arguments
+ // return -1 for error, error is set through TVMFFIErrorSetRaisedFromCStr
+ if (ReadDLTensorPtr(&args[0], &x) == -1) return -1;
+ if (ReadDLTensorPtr(&args[1], &y) == -1) return -1;
+
+ // Get current stream for device synchronization (e.g., CUDA)
+ // not needed for CPU, just keep here for demonstration purpose
+ void* stream = TVMFFIEnvGetStream(x->device.device_type,
x->device.device_id);
+
+ // perform the actual operation
+ for (int i = 0; i < x->shape[0]; ++i) {
+ ((float*)(y->data))[i] = ((float*)(x->data))[i] + 1;
+ }
+ // return 0 for success run
+ return 0;
+}
diff --git a/include/tvm/ffi/c_api.h b/include/tvm/ffi/c_api.h
index f13f820..3dcdf4f 100644
--- a/include/tvm/ffi/c_api.h
+++ b/include/tvm/ffi/c_api.h
@@ -27,21 +27,6 @@
#include <dlpack/dlpack.h>
#include <stdint.h>
-/*
- * \brief C-style Allocator that allocates memory for a DLPack tensor.
- * \param prototype The prototype DLTensor to offer details about device and
shape.
- * \param out The output DLManagedTensorVersioned.
- * \param error_ctx The context to set the error.
- * \param SetError The function to set the error.
- * \return 0 on success, -1 on failure.
- * call SetError(error_ctx, kind, message) to set the error kind and
message.
- * \note Error propagation via SetError.
- */
-typedef int (*DLPackTensorAllocator)(
//
- DLTensor* prototype, DLManagedTensorVersioned** out, void* error_ctx,
//
- void (*SetError)(void* error_ctx, const char* kind, const char* message)
//
-);
-
// Macros to do weak linking
#ifdef _MSC_VER
#define TVM_FFI_WEAK __declspec(selectany)
@@ -75,12 +60,29 @@ typedef int (*DLPackTensorAllocator)(
//
extern "C" {
#endif
+// TODO(tqchen): remove this once dlpack.h is updated
+typedef struct DLManagedTensorVersioned DLManagedTensorVersioned;
+
+/*
+ * \brief C-style Allocator that allocates memory for a DLPack tensor.
+ * \param prototype The prototype DLTensor to offer details about device and
shape.
+ * \param out The output DLManagedTensorVersioned.
+ * \param error_ctx The context to set the error.
+ * \param SetError The function to set the error.
+ * \return 0 on success, -1 on failure.
+ * call SetError(error_ctx, kind, message) to set the error kind and
message.
+ * \note Error propagation via SetError.
+ */
+typedef int (*DLPackTensorAllocator)(
//
+ DLTensor* prototype, DLManagedTensorVersioned** out, void* error_ctx,
//
+ void (*SetError)(void* error_ctx, const char* kind, const char* message)
//
+);
+
#ifdef __cplusplus
enum TVMFFITypeIndex : int32_t {
#else
typedef enum {
#endif
-
/*
* \brief The root type of all FFI objects.
*
@@ -279,7 +281,6 @@ typedef struct {
DLDataType v_dtype; // data type
DLDevice v_device; // device
char v_bytes[8]; // small string
- char32_t v_char32[2]; // small UCS4 string and Unicode
uint64_t v_uint64; // uint64 repr mainly used for hashing
};
} TVMFFIAny;
diff --git a/pyproject.toml b/pyproject.toml
index bfe7b42..6c0c490 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@
[project]
name = "apache-tvm-ffi"
-version = "0.1.0b1"
+version = "0.1.0b2"
description = "tvm ffi"
authors = [{ name = "TVM FFI team" }]
diff --git a/python/tvm_ffi/config.py b/python/tvm_ffi/config.py
index dcd85c2..4e87caa 100644
--- a/python/tvm_ffi/config.py
+++ b/python/tvm_ffi/config.py
@@ -48,6 +48,7 @@ def __main__():
parser.add_argument("--libs", action="store_true", help="Libraries to be
linked")
parser.add_argument("--cython-lib-path", action="store_true", help="Print
cython path")
parser.add_argument("--cxxflags", action="store_true", help="Print cxx
flags")
+ parser.add_argument("--cflags", action="store_true", help="Print c flags")
parser.add_argument("--ldflags", action="store_true", help="Print ld
flags")
args = parser.parse_args()
@@ -78,12 +79,15 @@ def __main__():
include_dir = libinfo.find_include_path()
dlpack_include_dir = libinfo.find_dlpack_include_path()
print(f"-I{include_dir} -I{dlpack_include_dir} -std=c++17")
+ if args.cflags:
+ include_dir = libinfo.find_include_path()
+ dlpack_include_dir = libinfo.find_dlpack_include_path()
+ print(f"-I{include_dir} -I{dlpack_include_dir}")
if args.libs:
if sys.platform.startswith("win32"):
print(find_windows_implib())
else:
print("-ltvm_ffi")
-
if args.ldflags:
if not sys.platform.startswith("win32"):
print(f"-L{os.path.dirname(libinfo.find_libtvm_ffi())}")