[GitHub] [tvm] echuraev commented on a diff in pull request #13837: [CLML][CODEGEN] CLML native codegen utility

via GitHub Wed, 01 Feb 2023 22:52:55 -0800


echuraev commented on code in PR #13837:
URL: https://github.com/apache/tvm/pull/13837#discussion_r1094096747



##########
apps/cpp_clml/clml_runner.h:
##########
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file clml_runner.h
+ * \brief CLML model runner.
+ */
+#ifndef CLML_APPS_CPP_RCLML_RUNNER_H_
+#define CLML_APPS_CPP_RCLML_RUNNER_H_
+
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <string>
+#if defined(__linux__) || defined(__ANDROID__)
+#include <unistd.h>
+#endif
+
+#include <CL/cl_qcom_ml_ops.h>
+#include <cnpy.h>
+#include <dmlc/io.h>
+
+#include "CL/cl.h"
+
+#define CLML_SDK_TEST_AND_EXIT(expression)                                     
                 \
+  {                                                                            
                 \
+    {                                                                          
                 \
+      int _n_ = !(expression);                                                 
                 \
+      if (_n_) {                                                               
                 \
+        fprintf(stderr, "Error on line %d of %s\nFailing expression: %s\n", 
__LINE__, __FILE__, \
+                #expression);                                                  
                 \
+        exit(1);                                                               
                 \
+      }                                                                        
                 \
+    }                                                                          
                 \
+  }
+
+#define CAT_I(a, b) a##b
+#define CAT(a, b) CAT_I(a, b)
+#define GET_ML_INTERFACE CAT(CAT(clGetMLInterfaceV, 
CL_QCOM_ML_OPS_H_MAJOR_VERSION), QCOM)
+#define GET_ML_API_INTERFACE CAT(CAT(CLMLInterfaceV, 
CL_QCOM_ML_OPS_H_MAJOR_VERSION), QCOM)
+
+namespace tvm {
+namespace runtime {
+
+/**
+ * \brief Tensor dimensions, batch, channel, height, width
+ *
+ */
+struct tensor_dims_t {
+  uint32_t n, c, h, w;
+};
+
+/*!
+ * \brief Tool Arguments.
+ * \arg input Numpy file for the model input
+ * \arg output Numpy file name to dump the model output as numpy
+ * \arg parsms Numpy file holding the params for models
+ */
+struct ToolArgs {
+  std::string input;
+  std::string output;
+  std::string params;
+  bool dump_meta = false;
+};
+
+/*!
+ * \brief encapsulates CLML Runner functionality for the sub graph
+ */
+class CLMLRunner {
+ public:
+  /*! \brief Constructor */
+  CLMLRunner(std::string name, ToolArgs& args, cl_platform_id arg_platform_id,
+             cl_context arg_context, cl_device_id arg_device_id, 
cl_command_queue arg_queue);
+
+  /*! \brief Returns the name for this sub graph */
+  std::string GetModName(void) { return r_name; }
+  /*! \brief Executes one cycle all CLML ops */
+  int Run(void);
+  /*! \brief set meta information */
+  void SetMetaInfo(std::string minfo);
+  /*! \brief Print function to show all meta information */
+  void PrintMetaInfo(void);
+  /*! \brief initializes the unusedTensor */
+  void MakeUnusedTensor(void);
+  /*! \brief Copy given bytestream of data to the tensor */
+  void CopyDataToCLMLTensor(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
tensor, void* data,
+                            cl_ml_tensor_layout_qcom layout = 
CL_TENSOR_LAYOUT_NCHW_QCOM);
+  /*! \brief Copy tensor data to data in expected layout format */
+  void CopyDataFromCLMLTensor(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
tensor, void* data,
+                              cl_ml_tensor_layout_qcom layout = 
CL_TENSOR_LAYOUT_NCHW_QCOM);
+  /*! \brief Allocates memory for the tensor descriptor */
+  cl_int AllocateTensorMemory(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
pTensorMemDesc);
+  /*!
+   * \brief Allocates memory for all tensor descriptor in storage map.
+   * Also initializes the parameter nodes, inputs from given numpy dumps if 
provided.
+   */
+  void AllocateMemAndPopulateParams(void);
+  /*! \brief Create a tensor descriptor given it's shape, dtype and layout */
+  std::shared_ptr<cl_ml_tensor_memory_desc_qcom> MakeCLMLTensor(
+      std::vector<size_t> shape, std::string dtype = "float32",
+      cl_ml_tensor_layout_qcom layout = CL_TENSOR_LAYOUT_OPTIMAL_QCOM);
+  /*! \brief Conv2D layer implementattion */
+  void MakeConv2D(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
+                  std::shared_ptr<cl_ml_tensor_memory_desc_qcom> weight_desc,
+                  std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bias_desc,
+                  std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc,
+                  std::vector<cl_uint> padding, std::vector<cl_uint> dilation,
+                  std::vector<cl_uint> strides, int groups, 
cl_convolution_mode_qcom mode,
+                  cl_activation_function_qcom activation, bool has_bias, bool 
has_act,
+                  std::string dtype);
+
+  /*! \brief Conv2D with Fused BatchNorm layer implementattion */
+  void MakeConv2DWithBN(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
input_desc,
+                        std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
weight_desc,
+                        std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
bias_desc,
+                        std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
output_desc,
+                        std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
bn_scale,
+                        std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bn_bias,
+                        std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bn_mean,
+                        std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bn_var,
+                        std::vector<float> bn_attrs, std::vector<cl_uint> 
padding,
+                        std::vector<cl_uint> dilation, std::vector<cl_uint> 
strides, int groups,
+                        cl_convolution_mode_qcom mode, 
cl_activation_function_qcom activation,
+                        bool has_bias, bool has_act, std::string dtype);
+
+  /*! \brief ReLU layer implementattion */
+  void MakeRelu(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
+                std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc,
+                cl_activation_function_qcom relu_type, std::string dtype);
+
+  /*! \brief Batch Normalization layer implementattion */
+  void MakeBatchNorm(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
+                     std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
output_desc,
+                     std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bn_scale,
+                     std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bn_bias,
+                     std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bn_mean,
+                     std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bn_var,
+                     std::vector<float> bn_attrs, std::string dtype);
+
+  /*! \brief Pool2D (with all variants) layer implementattion */
+  void MakePool2D(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
+                  std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc,
+                  std::vector<cl_uint> pool_size, std::vector<cl_uint> strides,
+                  std::vector<cl_uint> padding, std::string pool_type, 
std::string dtype);
+
+  /*! \brief GlobalPool2D (with all variants) layer implementattion */
+  void MakeGlobalPool2D(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
input_desc,
+                        std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
output_desc,
+                        std::vector<cl_uint> in_shape, std::string pool_type, 
std::string dtype);
+
+  /*! \brief Reshape layer implementattion */
+  void MakeReshape(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
+                   std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc, 
std::string dtype);
+
+  /*! \brief Concatenate layer implementattion */
+  void 
MakeConcatenate(std::vector<std::shared_ptr<cl_ml_tensor_memory_desc_qcom>> 
in_list,
+                       std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
output_desc, int axis,
+                       std::string dtype);
+
+  /*! \brief Dense layer implementattion */
+  void MakeDense(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
+                 std::shared_ptr<cl_ml_tensor_memory_desc_qcom> weight_desc,
+                 std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc,
+                 std::shared_ptr<cl_ml_tensor_memory_desc_qcom> bias_desc, 
std::string dtype);
+
+  /*! \brief SoftMax layer implementattion */
+  void MakeSoftMax(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
+                   std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc, 
std::string dtype);
+
+  /*! \brief Pad layer implementattion */
+  void MakePad(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
+               std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc, 
std::string pad_mode,
+               std::vector<cl_uint> padding, std::string dtype);
+
+  /*! \brief Batch Flatten layer implementattion */
+  void MakeBatchFlatten(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
input_desc,
+                        std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
output_desc,
+                        std::string dtype);
+
+  /*! \brief Clip layer implementattion */
+  void MakeClip(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_desc,
+                std::shared_ptr<cl_ml_tensor_memory_desc_qcom> output_desc, 
float a_max,
+                float a_min, std::string dtype);
+
+  /*! \brief Binary Operator (with all types) layer implementattion */
+  void MakeBinaryOp(std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_a,
+                    std::shared_ptr<cl_ml_tensor_memory_desc_qcom> input_b,
+                    std::shared_ptr<cl_ml_tensor_memory_desc_qcom> 
output_desc, std::string op_name,
+                    std::string dtype);
+
+  /*! \brief Vector of created operators */
+  std::vector<cl_ml_op_qcom> function;
+  /*! \brief Vector of graph's input tensor descriptors */
+  std::vector<std::shared_ptr<cl_ml_tensor_memory_desc_qcom>> inputs;
+  /*! \brief Map of graph's output tensor descriptors with names */
+  std::map<std::string, std::shared_ptr<cl_ml_tensor_memory_desc_qcom>> 
outputs;
+  /*! \brief Map of graph's output tensor names and dtypes */
+  std::map<std::string, std::string> outputs_dtypes;
+  /*! \brief Map of graph's output tensor names and shapes */
+  std::map<std::string, std::vector<size_t>> outputs_shapes;
+  /*! \brief Overall storage map for all tensor descriptors involved */
+  std::map<std::string, std::shared_ptr<cl_ml_tensor_memory_desc_qcom>> 
storage_map;
+  /*! \brief List of const tensor of the graph */
+  std::vector<std::string> consts;
+  /*! \brief List of all memory descriptor in graph */
+  std::vector<cl_ml_tensor_memory_desc_qcom> tensorMemDescs;
+  /*! \brief Tensor memory descriptor set */
+  cl_ml_tensor_mem_desc_set_qcom descriptorSet;
+  /*! \brief Unused tensor used across various ops */
+  std::shared_ptr<cl_ml_tensor_memory_desc_qcom> unusedTensor;
+
+  /*! \brief  ML API interface */
+  GET_ML_API_INTERFACE* h_ClmlIntf = NULL;

Review Comment:
   Probably better use `nullptr` here and below. I think that some compilers 
will generate a warning messages on the `NULL`.



##########
apps/cpp_clml/main.cc:
##########
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file main.cc
+ * \brief CLML Model execution application.
+ */
+
+#include "clml_runner.h"
+
+using namespace tvm::runtime;
+
+/*!
+ * \brief Auto generated model file (clml_models.cc) entry function definition.
+ * \param args The tool arguments to forward
+ * \param arg_platform OpenCL platform
+ * \param arg_context OpenCL context
+ * \param arg_device_id OpenCL device id
+ * \param queue OpenCL queue
+ * \return List of CLMLRunner objects corresponding to all sub graphs of a TVM 
module.
+ */
+std::vector<CLMLRunner> BuildModules(ToolArgs& args, cl_platform_id 
arg_platform,
+                                     cl_context arg_context, cl_device_id 
arg_device_id,
+                                     cl_command_queue queue);
+
+static const std::string kUsage =
+    "Command line usage\n"
+    "--input        - Numpy file for the model input (optional and we use 
random of not given)\n"
+    "--output       - Numpy file name to dump the model output as numpy\n"
+    "--params       - Numpy file with params\n"
+    "--dump-meta    - Dump model meta information\n"
+    "\n"
+    "  Example\n"
+    "  ./clml_run --dump-meta\n"
+    "  ./clml_run --params=clmlparams.npz\n"
+    "  ./clml_run --input=input.npz --output=output.npz 
--params=clml_params.npz\n"
+    "\n";
+
+/*!
+ * \brief PrintArgs print the contents of ToolArgs
+ * \param args ToolArgs structure
+ */
+void PrintArgs(const ToolArgs& args) {
+  LOG(INFO) << "Input         = " << args.input;
+  LOG(INFO) << "Output        = " << args.output;
+  LOG(INFO) << "Params        = " << args.params;
+  LOG(INFO) << "DumpMeta      = " << args.dump_meta;
+}
+
+#if defined(__linux__) || defined(__ANDROID__)
+/*!
+ * \brief CtrlCHandler, exits if Ctrl+C is pressed
+ * \param s signal
+ */
+void CtrlCHandler(int s) {
+  LOG(INFO) << "User pressed Ctrl+C, Exiting";
+  exit(1);
+}
+
+/*!
+ * \brief HandleCtrlC Register for handling Ctrl+C event.
+ */
+void HandleCtrlC() {
+  // Ctrl+C handler
+  struct sigaction sigIntHandler;
+  sigIntHandler.sa_handler = CtrlCHandler;
+  sigemptyset(&sigIntHandler.sa_mask);
+  sigIntHandler.sa_flags = 0;
+  sigaction(SIGINT, &sigIntHandler, nullptr);
+}
+#endif
+/*!
+ * \brief GetCmdOption Parse and find the command option.
+ * \param argc arg counter
+ * \param argv arg values
+ * \param option command line option to search for.
+ * \param key whether the option itself is key
+ * \return value corresponding to option.
+ */
+std::string GetCmdOption(int argc, char* argv[], std::string option, bool key 
= false) {
+  std::string cmd;
+  for (int i = 1; i < argc; ++i) {
+    std::string arg = argv[i];
+    if (arg.find(option) == 0) {
+      if (key) {
+        cmd = argv[i];
+        return cmd;
+      }
+      // We assume "=" is the end of option.
+      // ICHECK_EQ(*option.rbegin(), '=');
+      cmd = arg.substr(arg.find('=') + 1);
+      return cmd;
+    }
+  }
+  return cmd;
+}
+
+/*!
+ * \brief ParseCmdArgs parses the command line arguments.
+ * \param argc arg counter
+ * \param argv arg values
+ * \param args the output structure which holds the parsed values
+ */
+void ParseCmdArgs(int argc, char* argv[], struct ToolArgs& args) {
+  const std::string input = GetCmdOption(argc, argv, "--input=");
+  if (!input.empty()) {
+    args.input = input;
+  }
+
+  const std::string output = GetCmdOption(argc, argv, "--output=");
+  if (!output.empty()) {
+    args.output = output;
+  }
+
+  const std::string params = GetCmdOption(argc, argv, "--params=");
+  if (!params.empty()) {
+    args.params = params;
+  }
+
+  const std::string pmeta = GetCmdOption(argc, argv, "--dump-meta", true);
+  if (!pmeta.empty()) {
+    args.dump_meta = true;
+  }
+}
+
+/*!
+ * \brief Check CLML extension availability in the CL device.
+ * \param platform_id OpenCL platform
+ * \param device_id OpenCL device id
+ * \return true if extension present else false.
+ */
+bool ExtensionStringPresent(cl_platform_id platform_id, cl_device_id 
device_id) {
+  cl_int result = 0;
+  size_t reqd_size = 0;
+  result = clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, 0, NULL, 
&reqd_size);
+  CLML_SDK_TEST_AND_EXIT(reqd_size > 0u && result == CL_SUCCESS);
+
+  std::vector<char> buf(reqd_size);
+  result = clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, reqd_size, 
buf.data(), NULL);
+  CLML_SDK_TEST_AND_EXIT(result == CL_SUCCESS);
+
+  std::string extensions(buf.data());
+  LOG(WARNING) << "OpenCL Extensions:" << extensions;
+  return (extensions.find("cl_qcom_ml_ops") != std::string::npos);
+}
+
+/*!
+ * \brief Loads and Executes the model on given Target.
+ * \param args tool arguments
+ * \return result of operation.
+ */
+int ExecuteModel(ToolArgs& args) {
+#if defined(__linux__) || defined(__ANDROID__)
+  // Ctrl+C handler
+  HandleCtrlC();
+#endif
+
+  // Init OpenCL Environment
+  cl_int result;
+  cl_event readEvent = NULL;

Review Comment:
   Please, use `nullptr` instead of `NULL`



##########
apps/cpp_clml/CMakeLists.txt:
##########
@@ -0,0 +1,59 @@
+cmake_minimum_required(VERSION 3.13)
+
+project(clml_run VERSION 2.0)
+
+if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
+  message( FATAL_ERROR "CMAKE_TOOLCHAIN_FILE Not set, forcing exit. Suggested 
value: {ANDROID_NDK_PATH}/build/cmake/android.toolchain.cmake." )
+endif(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
+
+if(NOT DEFINED ANDROID_ABI)
+  message( FATAL_ERROR "ANDROID_ABI Not set, forcing exit. Suggested value(s): 
arm64-v8a (64), armeabi-v7a (32)" )
+endif(NOT DEFINED ANDROID_ABI)
+
+if(NOT DEFINED CLML_SDK)
+  message( FATAL_ERROR "CLML_SDK Not set, forcing exit." )
+endif(NOT DEFINED CLML_SDK)
+
+if (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY STREQUAL "ONLY")
+  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+endif()
+
+find_library(CLML_LIBRARIES NAMES libOpenCL.so NO_DEFAULT_PATH PATHS 
${CLML_SDK}/lib ${CLML_SDK}/lib64)
+
+# CMake/Android variables
+set( ANDROID_STL  c++_static CACHE STRING "Target Android STL") # default
+
+# Source variables
+set( OPENCL_INCLUDE_DIRS  ${CLML_SDK} CACHE PATH "filepath to OpenCL headers")
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED True)
+
+#we do not want to pass -fno-exceptions
+if(${CMAKE_CXX_FLAGS} MATCHES "-fno-exceptions")
+  string(REGEX REPLACE "-fno-exceptions" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})

Review Comment:
   Probably we should add a warning message to notify the user that we have 
implicitly modified his cmake options?



##########
apps/cpp_clml/CMakeLists.txt:
##########
@@ -0,0 +1,59 @@
+cmake_minimum_required(VERSION 3.13)
+
+project(clml_run VERSION 2.0)
+
+if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
+  message( FATAL_ERROR "CMAKE_TOOLCHAIN_FILE Not set, forcing exit. Suggested 
value: {ANDROID_NDK_PATH}/build/cmake/android.toolchain.cmake." )
+endif(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
+
+if(NOT DEFINED ANDROID_ABI)
+  message( FATAL_ERROR "ANDROID_ABI Not set, forcing exit. Suggested value(s): 
arm64-v8a (64), armeabi-v7a (32)" )
+endif(NOT DEFINED ANDROID_ABI)
+
+if(NOT DEFINED CLML_SDK)
+  message( FATAL_ERROR "CLML_SDK Not set, forcing exit." )
+endif(NOT DEFINED CLML_SDK)
+
+if (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY STREQUAL "ONLY")
+  set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+endif()
+
+find_library(CLML_LIBRARIES NAMES libOpenCL.so NO_DEFAULT_PATH PATHS 
${CLML_SDK}/lib ${CLML_SDK}/lib64)

Review Comment:
   What if the library wasn't found? Probably you'll get an error during 
compilation about undefined reference. Just an idea: can we use 
`find_package(OpenCL REQUIRED NO_DEFAULT_PATH PATHS ${CLML_SDK})` for CLML?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] echuraev commented on a diff in pull request #13837: [CLML][CODEGEN] CLML native codegen utility

Reply via email to