This is an automated email from the ASF dual-hosted git repository. zhic pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git
The following commit(s) were added to refs/heads/master by this push: new a2429c1 [Relay][External Codegen] Support data types for CSourceModuleCodegen args and output (#4934) a2429c1 is described below commit a2429c1fa61cf54d1890e887572c8fa93c467d7a Author: Jon Soifer <soif...@gmail.com> AuthorDate: Mon Feb 24 20:53:24 2020 -0800 [Relay][External Codegen] Support data types for CSourceModuleCodegen args and output (#4934) * Support int args and no extra buffers * Fixes * remove testing code * fix style * more style * use const args * style Co-authored-by: Jon Soifer <jo...@microsoft.com> --- src/relay/backend/contrib/codegen_c/codegen.cc | 57 +++++++++------ src/relay/backend/contrib/codegen_c/codegen_c.h | 94 +++++++++++++++++++------ src/relay/backend/contrib/dnnl/codegen.cc | 23 +++--- tests/python/relay/test_external_codegen.py | 18 +++++ 4 files changed, 139 insertions(+), 53 deletions(-) diff --git a/src/relay/backend/contrib/codegen_c/codegen.cc b/src/relay/backend/contrib/codegen_c/codegen.cc index 5584447..126d1d5 100644 --- a/src/relay/backend/contrib/codegen_c/codegen.cc +++ b/src/relay/backend/contrib/codegen_c/codegen.cc @@ -41,9 +41,11 @@ class CodegenC : public ExprVisitor, public CodegenCBase { explicit CodegenC(const std::string& id) { this->ext_func_id_ = id; } void VisitExpr_(const VarNode* node) { - ext_func_args_.push_back(node->name_hint()); + ext_func_args_.push_back(GetRef<Var>(node)); out_.clear(); - out_.push_back({node->name_hint(), 0}); + Output output; + output.name = node->name_hint(); + out_.push_back(output); } void VisitExpr_(const CallNode* call) final { @@ -70,6 +72,12 @@ class CodegenC : public ExprVisitor, public CodegenCBase { for (size_t i = 0; i < in_shape.size(); ++i) { macro_stream << ", " << in_shape[i]; } + + const auto* type_node = call->checked_type().as<TensorTypeNode>(); + CHECK(type_node); + const auto& dtype = GetDtypeString(type_node); + macro_stream << ", " << dtype; + macro_stream << ");"; func_decl_.push_back(macro_stream.str()); @@ -83,20 +91,18 @@ class CodegenC : public ExprVisitor, public CodegenCBase { decl_stream << ", "; } first = false; - decl_stream << out.first; + decl_stream << out.name; } } - auto type_node = call->checked_type().as<TensorTypeNode>(); - CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32)) - << "Only support single output tensor with float type"; std::string out = "buf_" + std::to_string(buf_idx_++); auto out_shape = GetShape(call->checked_type()); int out_size = 1; for (size_t i = 0; i < out_shape.size(); ++i) { out_size *= out_shape[i]; } - buf_stream << "float* " << out << " = (float*)std::malloc(4 * " << out_size << ");"; + buf_stream << dtype << "* " << out << + " = (" << dtype << "*)std::malloc(4 * " << out_size << ");"; buf_decl_.push_back(buf_stream.str()); decl_stream << ", " << out << ");"; @@ -104,7 +110,12 @@ class CodegenC : public ExprVisitor, public CodegenCBase { // Update output buffer out_.clear(); - out_.push_back({out, out_size}); + Output output; + output.name = out; + output.dtype = dtype; + output.need_copy = true; + output.size = out_size; + out_.push_back(output); } /*! @@ -128,7 +139,7 @@ class CodegenC : public ExprVisitor, public CodegenCBase { /*! \brief The index of allocated buffers. */ int buf_idx_ = 0; /*! \brief The arguments of a C compiler compatible function. */ - std::vector<std::string> ext_func_args_; + Array<Var> ext_func_args_; /*! \brief The statements of a C compiler compatible function. */ std::vector<std::string> ext_func_body; /*! \brief The declaration statements of a C compiler compatible function. */ @@ -136,7 +147,7 @@ class CodegenC : public ExprVisitor, public CodegenCBase { /*! \brief The declaration statements of buffers. */ std::vector<std::string> buf_decl_; /*! \brief The name and index pairs for output. */ - std::vector<std::pair<std::string, int>> out_; + std::vector<Output> out_; }; class CSourceCodegen : public CSourceModuleCodegenBase { @@ -161,21 +172,21 @@ class CSourceCodegen : public CSourceModuleCodegenBase { // Append some common macro for operator definition. const char* operator_macro = R"op_macro( - #define CSOURCE_BINARY_OP_1D(p_ID_, p_OP_, p_DIM1_) \ - extern "C" void p_ID_(float* a, float* b, float* out) { \ - for (int64_t i = 0; i < p_DIM1_; ++i) { \ - out[i] = a[i] p_OP_ b[i]; \ - } \ + #define CSOURCE_BINARY_OP_1D(p_ID_, p_OP_, p_DIM1_, p_DTYPE) \ + extern "C" void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \ + for (int64_t i = 0; i < p_DIM1_; ++i) { \ + out[i] = a[i] p_OP_ b[i]; \ + } \ } - #define CSOURCE_BINARY_OP_2D(p_ID_, p_OP_, p_DIM1_, p_DIM2_) \ - extern "C" void p_ID_(float* a, float* b, float* out) { \ - for (int64_t i = 0; i < p_DIM1_; ++i) { \ - for (int64_t j = 0; j < p_DIM2_; ++j) { \ - int64_t k = i * p_DIM2_ + j; \ - out[k] = a[k] p_OP_ b[k]; \ - } \ - } \ + #define CSOURCE_BINARY_OP_2D(p_ID_, p_OP_, p_DIM1_, p_DIM2_, p_DTYPE) \ + extern "C" void p_ID_(p_DTYPE* a, p_DTYPE* b, p_DTYPE* out) { \ + for (int64_t i = 0; i < p_DIM1_; ++i) { \ + for (int64_t j = 0; j < p_DIM2_; ++j) { \ + int64_t k = i * p_DIM2_ + j; \ + out[k] = a[k] p_OP_ b[k]; \ + } \ + } \ } )op_macro"; diff --git a/src/relay/backend/contrib/codegen_c/codegen_c.h b/src/relay/backend/contrib/codegen_c/codegen_c.h index f473c93..2a88d4b 100644 --- a/src/relay/backend/contrib/codegen_c/codegen_c.h +++ b/src/relay/backend/contrib/codegen_c/codegen_c.h @@ -35,6 +35,13 @@ namespace tvm { namespace relay { namespace contrib { +struct Output { + std::string name; + std::string dtype; + int size; + bool need_copy; +}; + class CSourceModuleCodegenBase { public: CSourceModuleCodegenBase() = default; @@ -98,7 +105,7 @@ class CodegenCBase { * \brief Gerenate C code for the external function. * * \param func_name The name of the external function. - * \param arg_cnt The expected number of arguments. + * \param args arguments to the external function. * * \code * @@ -116,16 +123,18 @@ class CodegenCBase { * * \endcode */ - void GenerateBackendCFunc(const std::string& func_name, int arg_cnt) { + void GenerateBackendCFunc(const std::string& func_name, + const Array<Var>& args, + const Output& out) { // Print signature code_stream_ << "\n"; code_stream_ << "extern \"C\" int " << func_name << "_wrapper_("; - for (int i = 0; i < arg_cnt - 1; i++) { + for (size_t i = 0; i < args.size(); i++) { code_stream_ << "DLTensor* arg" << i << ",\n"; code_stream_ << "\t"; } - if (arg_cnt > 0) { - code_stream_ << "DLTensor* arg" << arg_cnt - 1 << ") {\n"; + if (args.size() > 0) { + code_stream_ << "DLTensor* arg" << args.size() << ") {\n"; } EnterScope(); @@ -133,12 +142,13 @@ class CodegenCBase { // Generate the internal call. PrintIndents(); code_stream_ << func_name << "_("; - for (int i = 0; i < arg_cnt - 1; i++) { - code_stream_ << "static_cast<float*>(arg" << i << "->data),\n"; + for (size_t i = 0; i < args.size(); i++) { + const auto& dtype_str = GetDtypeString(args[i]); + code_stream_ << "static_cast<" << dtype_str << "*>(arg" << i << "->data),\n"; PrintIndents(); } - if (arg_cnt > 0) { - code_stream_ << "static_cast<float*>(arg" << arg_cnt - 1 << "->data)"; + if (args.size() > 0) { + code_stream_ << "static_cast<" << out.dtype << "*>(arg" << args.size() << "->data)"; } code_stream_ << ");\n"; PrintIndents(); @@ -207,17 +217,21 @@ class CodegenCBase { * * \return The emitted code string. */ - std::string JitImpl(std::string ext_func_id, std::vector<std::string> args, - std::vector<std::string> buf_decl, std::vector<std::string> body, - std::vector<std::pair<std::string, int>> out) { + std::string JitImpl(std::string ext_func_id, const Array<Var>& args, + const std::vector<std::string>& buf_decl, + const std::vector<std::string>& body, + const std::vector<Output>& out) { // Create the signature. For example, it could be: // extern "C" void dnnl_0_(float* input0, float* input1, float* out, int M, int N) {} code_stream_ << "extern \"C\" void " << ext_func_id << "_("; + CHECK_EQ(out.size(), 1U) << "Internal error: only single output is support."; + for (const auto& arg : args) { - code_stream_ << "float* " << arg << ", "; + const auto& dtype_str = GetDtypeString(arg); + code_stream_ << dtype_str << "* " << arg->name_hint() << ", "; } - code_stream_ << "float* out) {\n"; + code_stream_ << out[0].dtype << "* out) {\n"; this->EnterScope(); // Function body @@ -232,24 +246,60 @@ class CodegenCBase { } // Copy output - CHECK_EQ(out.size(), 1U) << "Internal error: only single output is support."; - this->PrintIndents(); - code_stream_ << "std::memcpy(out, " << out[0].first << ", 4 * " << out[0].second << ");\n"; - - // Free buffers - for (size_t i = 0; i < buf_decl.size(); i++) { + if (out[0].need_copy) { this->PrintIndents(); - code_stream_ << "std::free(buf_" << i << ");\n"; + code_stream_ << "std::memcpy(out, " << out[0].name << ", 4 * " << out[0].size << ");\n"; + + // Free buffers + for (size_t i = 0; i < buf_decl.size(); i++) { + this->PrintIndents(); + code_stream_ << "std::free(buf_" << i << ");\n"; + } } this->ExitScope(); code_stream_ << "}\n"; // Create the wrapper to call the ext_func - this->GenerateBackendCFunc(ext_func_id, args.size() + 1 /* output */); + this->GenerateBackendCFunc(ext_func_id, args, out[0]); return code_stream_.str(); } + /*! + * \brief Returns dtype string + * + * \param var Var to get the dtype of + * + * \return The dtype string. + */ + std::string GetDtypeString(const Var& var) { + auto ttype = var->checked_type().as<TensorTypeNode>(); + CHECK(ttype) << "Expect TensorTypeNode"; + return GetDtypeString(ttype); + } + + /*! + * \brief Returns dtype string + * + * \param ttype TensorTypeNode* to get the dtype of + * + * \return The dtype string. + */ + std::string GetDtypeString(const TensorTypeNode* ttype) { + std::string dtype; + if (runtime::TypeMatch(ttype->dtype, kDLFloat, 32)) { + dtype = "float"; + } else if (runtime::TypeMatch(ttype->dtype, kDLInt, 32)) { + dtype = "int"; + } else if (runtime::TypeMatch(ttype->dtype, kDLInt, 64)) { + dtype = "int64_t"; + } else { + LOG(FATAL) << "Unsupported dtype " << ttype->dtype; + } + + return dtype; + } + /*! \brief The external function source code stream. */ std::ostringstream code_stream_; diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 6206173..7371174 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -45,9 +45,11 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { explicit CodegenDNNL(const std::string& id) { this->ext_func_id_ = id; } void VisitExpr_(const VarNode* node) final { - ext_func_args_.push_back(node->name_hint()); + ext_func_args_.push_back(GetRef<Var>(node)); out_.clear(); - out_.push_back({node->name_hint(), 0}); + Output output; + output.name = node->name_hint(); + out_.push_back(output); } void VisitExpr_(const TupleGetItemNode* op) final { @@ -90,14 +92,14 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { decl_stream << ", "; } first = false; - decl_stream << out.first; + decl_stream << out.name; } } // Analyze the output buffer auto type_node = call->checked_type().as<TensorTypeNode>(); - CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32)) - << "Only support single output tensor with float type"; + CHECK(type_node); + const auto& dtype = GetDtypeString(type_node); std::string out = "buf_" + std::to_string(buf_idx_++); auto out_shape = GetShape(call->checked_type()); int out_size = 1; @@ -118,7 +120,12 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { // Update output buffer out_.clear(); - out_.push_back({out, out_size}); + Output output; + output.name = out; + output.dtype = dtype; + output.need_copy = true; + output.size = out_size; + out_.push_back(output); } std::string JIT(void) { @@ -213,13 +220,13 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { */ int buf_idx_{0}; /*! \brief The arguments used by a wrapped function that calls DNNL kernels. */ - std::vector<std::string> ext_func_args_; + Array<Var> ext_func_args_; /*! \brief statement of the function that will be compiled using DNNL kernels. */ std::vector<std::string> ext_func_body; /*! \brief The declaration of intermeidate buffers. */ std::vector<std::string> buf_decl_; /*! \brief The name of the the outputs. */ - std::vector<std::pair<std::string, int>> out_; + std::vector<Output> out_; }; /*! diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py index 608bc2a..b086df0 100644 --- a/tests/python/relay/test_external_codegen.py +++ b/tests/python/relay/test_external_codegen.py @@ -161,6 +161,23 @@ def test_extern_gcc_single_op(): check_result(mod, {"x": x_data, "y": y_data}, (8, 8), x_data + y_data) +def test_extern_gcc_single_op_int(): + x = relay.var('x', shape=(8, 8), dtype="int32") + y = relay.var('y', shape=(8, 8), dtype="int32") + + x0 = relay.var('x0', shape=(8, 8), dtype="int32") + y0 = relay.var('y0', shape=(8, 8), dtype="int32") + z = x0 + y0 + f = relay.Function([x0, y0], z) + f = set_external_func_attr(f, "ccompiler", "ccompiler_0") + call = relay.Call(f, [x, y]) + mod = tvm.IRModule.from_expr(call) + x_data = np.random.rand(8, 8).astype('int32') + y_data = np.random.rand(8, 8).astype('int32') + + check_result(mod, {"x": x_data, "y": y_data}, (8, 8), x_data + y_data) + + def test_extern_gcc(): x = relay.var('x', shape=(2, 2)) y = relay.var('y', shape=(2, 2)) @@ -242,5 +259,6 @@ def test_extern_dnnl(): if __name__ == "__main__": test_multi_node_subgraph() test_extern_gcc_single_op() + test_extern_gcc_single_op_int() test_extern_gcc() test_extern_dnnl()