This is an automated email from the ASF dual-hosted git repository.
zhic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git
The following commit(s) were added to refs/heads/master by this push:
new a2429c1 [Relay][External Codegen] Support data types for
CSourceModuleCodegen args and output (#4934)
a2429c1 is described below
commit a2429c1fa61cf54d1890e887572c8fa93c467d7a
Author: Jon Soifer <[email protected]>
AuthorDate: Mon Feb 24 20:53:24 2020 -0800
[Relay][External Codegen] Support data types for CSourceModuleCodegen args
and output (#4934)
* Support int args and no extra buffers
* Fixes
* remove testing code
* fix style
* more style
* use const args
* style
Co-authored-by: Jon Soifer <[email protected]>
---
src/relay/backend/contrib/codegen_c/codegen.cc | 57 +++++++++------
src/relay/backend/contrib/codegen_c/codegen_c.h | 94 +++++++++++++++++++------
src/relay/backend/contrib/dnnl/codegen.cc | 23 +++---
tests/python/relay/test_external_codegen.py | 18 +++++
4 files changed, 139 insertions(+), 53 deletions(-)
diff --git a/src/relay/backend/contrib/codegen_c/codegen.cc
b/src/relay/backend/contrib/codegen_c/codegen.cc
index 5584447..126d1d5 100644
--- a/src/relay/backend/contrib/codegen_c/codegen.cc
+++ b/src/relay/backend/contrib/codegen_c/codegen.cc
@@ -41,9 +41,11 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
explicit CodegenC(const std::string& id) { this->ext_func_id_ = id; }
void VisitExpr_(const VarNode* node) {
- ext_func_args_.push_back(node->name_hint());
+ ext_func_args_.push_back(GetRef<Var>(node));
out_.clear();
- out_.push_back({node->name_hint(), 0});
+ Output output;
+ output.name = node->name_hint();
+ out_.push_back(output);
}
void VisitExpr_(const CallNode* call) final {
@@ -70,6 +72,12 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
for (size_t i = 0; i < in_shape.size(); ++i) {
macro_stream << ", " << in_shape[i];
}
+
+ const auto* type_node = call->checked_type().as<TensorTypeNode>();
+ CHECK(type_node);
+ const auto& dtype = GetDtypeString(type_node);
+ macro_stream << ", " << dtype;
+
macro_stream << ");";
func_decl_.push_back(macro_stream.str());
@@ -83,20 +91,18 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
decl_stream << ", ";
}
first = false;
- decl_stream << out.first;
+ decl_stream << out.name;
}
}
- auto type_node = call->checked_type().as<TensorTypeNode>();
- CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype,
kDLFloat, 32))
- << "Only support single output tensor with float type";
std::string out = "buf_" + std::to_string(buf_idx_++);
auto out_shape = GetShape(call->checked_type());
int out_size = 1;
for (size_t i = 0; i < out_shape.size(); ++i) {
out_size *= out_shape[i];
}
- buf_stream << "float* " << out << " = (float*)std::malloc(4 * " <<
out_size << ");";
+ buf_stream << dtype << "* " << out <<
+ " = (" << dtype << "*)std::malloc(4 * " << out_size << ");";
buf_decl_.push_back(buf_stream.str());
decl_stream << ", " << out << ");";
@@ -104,7 +110,12 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
// Update output buffer
out_.clear();
- out_.push_back({out, out_size});
+ Output output;
+ output.name = out;
+ output.dtype = dtype;
+ output.need_copy = true;
+ output.size = out_size;
+ out_.push_back(output);
}
/*!
@@ -128,7 +139,7 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
/*! \brief The index of allocated buffers. */
int buf_idx_ = 0;
/*! \brief The arguments of a C compiler compatible function. */
- std::vector<std::string> ext_func_args_;
+ Array<Var> ext_func_args_;
/*! \brief The statements of a C compiler compatible function. */
std::vector<std::string> ext_func_body;
/*! \brief The declaration statements of a C compiler compatible function. */
@@ -136,7 +147,7 @@ class CodegenC : public ExprVisitor, public CodegenCBase {
/*! \brief The declaration statements of buffers. */
std::vector<std::string> buf_decl_;
/*! \brief The name and index pairs for output. */
- std::vector<std::pair<std::string, int>> out_;
+ std::vector<Output> out_;
};
class CSourceCodegen : public CSourceModuleCodegenBase {
@@ -161,21 +172,21 @@ class CSourceCodegen : public CSourceModuleCodegenBase {
// Append some common macro for operator definition.
const char* operator_macro = R"op_macro(
- #define CSOURCE_BINARY_OP_1D(p_ID_, p_OP_, p_DIM1_) \
- extern "C" void p_ID_(float* a, float* b, float* out) { \
- for (int64_t i = 0; i < p_DIM1_; ++i) { \
- out[i] = a[i] p_OP_ b[i]; \
- } \
+ #define CSOURCE_BINARY_OP_1D(p_ID_, p_OP_, p_DIM1_, p_DTYPE) \
+ extern "C" void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \
+ for (int64_t i = 0; i < p_DIM1_; ++i) { \
+ out[i] = a[i] p_OP_ b[i]; \
+ } \
}
- #define CSOURCE_BINARY_OP_2D(p_ID_, p_OP_, p_DIM1_, p_DIM2_) \
- extern "C" void p_ID_(float* a, float* b, float* out) { \
- for (int64_t i = 0; i < p_DIM1_; ++i) { \
- for (int64_t j = 0; j < p_DIM2_; ++j) { \
- int64_t k = i * p_DIM2_ + j; \
- out[k] = a[k] p_OP_ b[k]; \
- } \
- } \
+ #define CSOURCE_BINARY_OP_2D(p_ID_, p_OP_, p_DIM1_, p_DIM2_, p_DTYPE) \
+ extern "C" void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \
+ for (int64_t i = 0; i < p_DIM1_; ++i) { \
+ for (int64_t j = 0; j < p_DIM2_; ++j) { \
+ int64_t k = i * p_DIM2_ + j; \
+ out[k] = a[k] p_OP_ b[k]; \
+ } \
+ } \
}
)op_macro";
diff --git a/src/relay/backend/contrib/codegen_c/codegen_c.h
b/src/relay/backend/contrib/codegen_c/codegen_c.h
index f473c93..2a88d4b 100644
--- a/src/relay/backend/contrib/codegen_c/codegen_c.h
+++ b/src/relay/backend/contrib/codegen_c/codegen_c.h
@@ -35,6 +35,13 @@ namespace tvm {
namespace relay {
namespace contrib {
+struct Output {
+ std::string name;
+ std::string dtype;
+ int size;
+ bool need_copy;
+};
+
class CSourceModuleCodegenBase {
public:
CSourceModuleCodegenBase() = default;
@@ -98,7 +105,7 @@ class CodegenCBase {
* \brief Gerenate C code for the external function.
*
* \param func_name The name of the external function.
- * \param arg_cnt The expected number of arguments.
+ * \param args arguments to the external function.
*
* \code
*
@@ -116,16 +123,18 @@ class CodegenCBase {
*
* \endcode
*/
- void GenerateBackendCFunc(const std::string& func_name, int arg_cnt) {
+ void GenerateBackendCFunc(const std::string& func_name,
+ const Array<Var>& args,
+ const Output& out) {
// Print signature
code_stream_ << "\n";
code_stream_ << "extern \"C\" int " << func_name << "_wrapper_(";
- for (int i = 0; i < arg_cnt - 1; i++) {
+ for (size_t i = 0; i < args.size(); i++) {
code_stream_ << "DLTensor* arg" << i << ",\n";
code_stream_ << "\t";
}
- if (arg_cnt > 0) {
- code_stream_ << "DLTensor* arg" << arg_cnt - 1 << ") {\n";
+ if (args.size() > 0) {
+ code_stream_ << "DLTensor* arg" << args.size() << ") {\n";
}
EnterScope();
@@ -133,12 +142,13 @@ class CodegenCBase {
// Generate the internal call.
PrintIndents();
code_stream_ << func_name << "_(";
- for (int i = 0; i < arg_cnt - 1; i++) {
- code_stream_ << "static_cast<float*>(arg" << i << "->data),\n";
+ for (size_t i = 0; i < args.size(); i++) {
+ const auto& dtype_str = GetDtypeString(args[i]);
+ code_stream_ << "static_cast<" << dtype_str << "*>(arg" << i <<
"->data),\n";
PrintIndents();
}
- if (arg_cnt > 0) {
- code_stream_ << "static_cast<float*>(arg" << arg_cnt - 1 << "->data)";
+ if (args.size() > 0) {
+ code_stream_ << "static_cast<" << out.dtype << "*>(arg" << args.size()
<< "->data)";
}
code_stream_ << ");\n";
PrintIndents();
@@ -207,17 +217,21 @@ class CodegenCBase {
*
* \return The emitted code string.
*/
- std::string JitImpl(std::string ext_func_id, std::vector<std::string> args,
- std::vector<std::string> buf_decl,
std::vector<std::string> body,
- std::vector<std::pair<std::string, int>> out) {
+ std::string JitImpl(std::string ext_func_id, const Array<Var>& args,
+ const std::vector<std::string>& buf_decl,
+ const std::vector<std::string>& body,
+ const std::vector<Output>& out) {
// Create the signature. For example, it could be:
// extern "C" void dnnl_0_(float* input0, float* input1, float* out, int
M, int N) {}
code_stream_ << "extern \"C\" void " << ext_func_id << "_(";
+ CHECK_EQ(out.size(), 1U) << "Internal error: only single output is
support.";
+
for (const auto& arg : args) {
- code_stream_ << "float* " << arg << ", ";
+ const auto& dtype_str = GetDtypeString(arg);
+ code_stream_ << dtype_str << "* " << arg->name_hint() << ", ";
}
- code_stream_ << "float* out) {\n";
+ code_stream_ << out[0].dtype << "* out) {\n";
this->EnterScope();
// Function body
@@ -232,24 +246,60 @@ class CodegenCBase {
}
// Copy output
- CHECK_EQ(out.size(), 1U) << "Internal error: only single output is
support.";
- this->PrintIndents();
- code_stream_ << "std::memcpy(out, " << out[0].first << ", 4 * " <<
out[0].second << ");\n";
-
- // Free buffers
- for (size_t i = 0; i < buf_decl.size(); i++) {
+ if (out[0].need_copy) {
this->PrintIndents();
- code_stream_ << "std::free(buf_" << i << ");\n";
+ code_stream_ << "std::memcpy(out, " << out[0].name << ", 4 * " <<
out[0].size << ");\n";
+
+ // Free buffers
+ for (size_t i = 0; i < buf_decl.size(); i++) {
+ this->PrintIndents();
+ code_stream_ << "std::free(buf_" << i << ");\n";
+ }
}
this->ExitScope();
code_stream_ << "}\n";
// Create the wrapper to call the ext_func
- this->GenerateBackendCFunc(ext_func_id, args.size() + 1 /* output */);
+ this->GenerateBackendCFunc(ext_func_id, args, out[0]);
return code_stream_.str();
}
+ /*!
+ * \brief Returns dtype string
+ *
+ * \param var Var to get the dtype of
+ *
+ * \return The dtype string.
+ */
+ std::string GetDtypeString(const Var& var) {
+ auto ttype = var->checked_type().as<TensorTypeNode>();
+ CHECK(ttype) << "Expect TensorTypeNode";
+ return GetDtypeString(ttype);
+ }
+
+ /*!
+ * \brief Returns dtype string
+ *
+ * \param ttype TensorTypeNode* to get the dtype of
+ *
+ * \return The dtype string.
+ */
+ std::string GetDtypeString(const TensorTypeNode* ttype) {
+ std::string dtype;
+ if (runtime::TypeMatch(ttype->dtype, kDLFloat, 32)) {
+ dtype = "float";
+ } else if (runtime::TypeMatch(ttype->dtype, kDLInt, 32)) {
+ dtype = "int";
+ } else if (runtime::TypeMatch(ttype->dtype, kDLInt, 64)) {
+ dtype = "int64_t";
+ } else {
+ LOG(FATAL) << "Unsupported dtype " << ttype->dtype;
+ }
+
+ return dtype;
+ }
+
/*! \brief The external function source code stream. */
std::ostringstream code_stream_;
diff --git a/src/relay/backend/contrib/dnnl/codegen.cc
b/src/relay/backend/contrib/dnnl/codegen.cc
index 6206173..7371174 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -45,9 +45,11 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
explicit CodegenDNNL(const std::string& id) { this->ext_func_id_ = id; }
void VisitExpr_(const VarNode* node) final {
- ext_func_args_.push_back(node->name_hint());
+ ext_func_args_.push_back(GetRef<Var>(node));
out_.clear();
- out_.push_back({node->name_hint(), 0});
+ Output output;
+ output.name = node->name_hint();
+ out_.push_back(output);
}
void VisitExpr_(const TupleGetItemNode* op) final {
@@ -90,14 +92,14 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase
{
decl_stream << ", ";
}
first = false;
- decl_stream << out.first;
+ decl_stream << out.name;
}
}
// Analyze the output buffer
auto type_node = call->checked_type().as<TensorTypeNode>();
- CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype,
kDLFloat, 32))
- << "Only support single output tensor with float type";
+ CHECK(type_node);
+ const auto& dtype = GetDtypeString(type_node);
std::string out = "buf_" + std::to_string(buf_idx_++);
auto out_shape = GetShape(call->checked_type());
int out_size = 1;
@@ -118,7 +120,12 @@ class CodegenDNNL : public ExprVisitor, public
CodegenCBase {
// Update output buffer
out_.clear();
- out_.push_back({out, out_size});
+ Output output;
+ output.name = out;
+ output.dtype = dtype;
+ output.need_copy = true;
+ output.size = out_size;
+ out_.push_back(output);
}
std::string JIT(void) {
@@ -213,13 +220,13 @@ class CodegenDNNL : public ExprVisitor, public
CodegenCBase {
*/
int buf_idx_{0};
/*! \brief The arguments used by a wrapped function that calls DNNL kernels.
*/
- std::vector<std::string> ext_func_args_;
+ Array<Var> ext_func_args_;
/*! \brief statement of the function that will be compiled using DNNL
kernels. */
std::vector<std::string> ext_func_body;
/*! \brief The declaration of intermeidate buffers. */
std::vector<std::string> buf_decl_;
/*! \brief The name of the the outputs. */
- std::vector<std::pair<std::string, int>> out_;
+ std::vector<Output> out_;
};
/*!
diff --git a/tests/python/relay/test_external_codegen.py
b/tests/python/relay/test_external_codegen.py
index 608bc2a..b086df0 100644
--- a/tests/python/relay/test_external_codegen.py
+++ b/tests/python/relay/test_external_codegen.py
@@ -161,6 +161,23 @@ def test_extern_gcc_single_op():
check_result(mod, {"x": x_data, "y": y_data}, (8, 8), x_data + y_data)
+def test_extern_gcc_single_op_int():
+ x = relay.var('x', shape=(8, 8), dtype="int32")
+ y = relay.var('y', shape=(8, 8), dtype="int32")
+
+ x0 = relay.var('x0', shape=(8, 8), dtype="int32")
+ y0 = relay.var('y0', shape=(8, 8), dtype="int32")
+ z = x0 + y0
+ f = relay.Function([x0, y0], z)
+ f = set_external_func_attr(f, "ccompiler", "ccompiler_0")
+ call = relay.Call(f, [x, y])
+ mod = tvm.IRModule.from_expr(call)
+ x_data = np.random.rand(8, 8).astype('int32')
+ y_data = np.random.rand(8, 8).astype('int32')
+
+ check_result(mod, {"x": x_data, "y": y_data}, (8, 8), x_data + y_data)
+
+
def test_extern_gcc():
x = relay.var('x', shape=(2, 2))
y = relay.var('y', shape=(2, 2))
@@ -242,5 +259,6 @@ def test_extern_dnnl():
if __name__ == "__main__":
test_multi_node_subgraph()
test_extern_gcc_single_op()
+ test_extern_gcc_single_op_int()
test_extern_gcc()
test_extern_dnnl()