This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 37bca85c95 [OPENCL][ADRENO] Introduce Qualcomm extension support
(#17519)
37bca85c95 is described below
commit 37bca85c95092abbca0389627c9b2181ef6c6544
Author: Siva <[email protected]>
AuthorDate: Thu Nov 14 19:42:49 2024 +0530
[OPENCL][ADRENO] Introduce Qualcomm extension support (#17519)
Introduce qualcomm extension support
"cl_qcom_perf_hint", "cl_qcom_priority_hint" extn support added over
workspace interface.
OpenCL version will be picked up from SDK headers. CI fixes for build
without Adreno OpenCL SDK.
Entensions are activated based on its availability in SDK.
New workspace API "SetNativePtr" defined that releases existing cl_mem
and creates new mem object backed by given host ptr.
Works for cl_qcom_ion_host_ptr,
cl_qcom_android_ahardwarebuffer_host_ptr,
cl_qcom_android_native_buffer_host_ptr, cl_qcom_dmabuf_host_ptr and
cl_qcom_ion_host_ptr.
The responsibility of preparing the host_ptr objects is with
application.
Some times the application needs device id for various devices related
information. Use below ref. to get cl_device_id from workspace.
OpenCLWorkspace* workspace = OpenCLWorkspace::Global(); cl_device_id
device_id = workspace->GetCLDeviceID(0);
---
cmake/config.cmake | 3 ++
cmake/modules/OpenCL.cmake | 13 ++++++-
cmake/modules/contrib/CLML.cmake | 2 +-
cmake/utils/FindOpenCL.cmake | 2 +-
src/runtime/opencl/opencl_common.h | 14 ++++++-
src/runtime/opencl/opencl_device_api.cc | 50 +++++++++++++++++++++++--
src/support/libinfo.cc | 5 +++
tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc | 50 +++++++++++++++++++++++++
tests/scripts/task_build_adreno_bins.sh | 2 +
9 files changed, 133 insertions(+), 8 deletions(-)
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 26d50630f7..0d912c0c75 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -483,3 +483,6 @@ SET(CMAKE_VS_PLATFORM_NAME_DEFAULT "x64")
# Set Windows Visual Studio default host (equivalent to -Thost=x64)
SET(CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE "x64")
+
+# Enable Qualcomm OpenCL extension support
+set(USE_OPENCL_EXTN_QCOM OFF)
diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
index ddcd1e4190..67d739bb63 100644
--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -84,7 +84,7 @@ if(USE_OPENCL)
"tests/cpp-runtime/opencl/*.cc"
)
add_executable(opencl-cpptest ${OPENCL_TEST_SRCS})
- target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime)
+ target_link_libraries(opencl-cpptest PRIVATE gtest_main tvm_runtime
${OpenCL_LIBRARIES})
else()
message(STATUS "Couldn't build OpenCL-Gtests")
endif()
@@ -93,6 +93,17 @@ if(USE_OPENCL)
if(USE_OPENCL_ENABLE_HOST_PTR)
add_definitions(-DOPENCL_ENABLE_HOST_PTR)
endif(USE_OPENCL_ENABLE_HOST_PTR)
+ if(USE_OPENCL_EXTN_QCOM)
+ add_definitions(-DUSE_OPENCL_EXTN_QCOM)
+ find_path(ocl_header cl.h HINTS ${OpenCL_INCLUDE_DIRS} PATH_SUFFIXES CL)
+ set(OCL_VERSION_HEADER "${ocl_header}/cl.h")
+ if(EXISTS ${OCL_VERSION_HEADER})
+ file(READ ${OCL_VERSION_HEADER} ver)
+ string(REGEX MATCH "CL_TARGET_OPENCL_VERSION ([0-9]*)" _ ${ver})
+ add_definitions(-DCL_TARGET_OPENCL_VERSION=${CMAKE_MATCH_1})
+ message(STATUS "Set OpenCL Target version to " ${CMAKE_MATCH_1})
+ endif()
+ endif(USE_OPENCL_EXTN_QCOM)
else()
list(APPEND COMPILER_SRCS src/target/opt/build_opencl_off.cc)
endif(USE_OPENCL)
diff --git a/cmake/modules/contrib/CLML.cmake b/cmake/modules/contrib/CLML.cmake
index e658f15865..118091696a 100644
--- a/cmake/modules/contrib/CLML.cmake
+++ b/cmake/modules/contrib/CLML.cmake
@@ -77,7 +77,7 @@ if(USE_CLML_GRAPH_EXECUTOR)
message(STATUS "Enable OpenCL as fallback to CLML")
file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
- set(USE_OPENCL ON)
+ set(USE_OPENCL ${CLML_PATH})
if(USE_OPENCL_ENABLE_HOST_PTR)
add_definitions(-DOPENCL_ENABLE_HOST_PTR)
endif(USE_OPENCL_ENABLE_HOST_PTR)
diff --git a/cmake/utils/FindOpenCL.cmake b/cmake/utils/FindOpenCL.cmake
index 8eb35ab399..13ffa71593 100644
--- a/cmake/utils/FindOpenCL.cmake
+++ b/cmake/utils/FindOpenCL.cmake
@@ -46,7 +46,7 @@ macro(find_opencl use_opencl)
endif()
if(__opencl_sdk)
- set(OpenCL_INCLUDE_DIRS ${__opencl_sdk}/include)
+ set(OpenCL_INCLUDE_DIRS ${__opencl_sdk}/include ${__opencl_sdk})
if (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY STREQUAL "ONLY")
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
endif()
diff --git a/src/runtime/opencl/opencl_common.h
b/src/runtime/opencl/opencl_common.h
index f752a487ea..e0abd1841b 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -50,12 +50,17 @@
* files. This also allows us to expose the OpenCL version through
* tvm.runtime.Device.
*/
+#if !defined(CL_TARGET_OPENCL_VERSION)
#define CL_TARGET_OPENCL_VERSION 120
+#endif
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
+#ifdef USE_OPENCL_EXTN_QCOM
+#include <CL/cl_ext_qcom.h>
+#endif
#endif
#include <memory>
@@ -254,8 +259,13 @@ class OpenCLWorkspace : public DeviceAPI {
}
// Initialize the device.
void Init(const std::string& type_key, const std::string& device_type,
- const std::string& platform_name = "");
+ const std::string& platform_name = "", cl_context_properties
properties[] = nullptr);
virtual void Init() { Init(this->type_key, "gpu"); }
+ virtual bool Init(cl_context_properties ctx_props[]) {
+ if (!contexts.empty()) return false;
+ Init(this->type_key, "gpu", "", ctx_props);
+ return true;
+ }
// Check whether the context is OpenCL or not.
virtual bool IsOpenCLDevice(Device dev) { return dev.device_type ==
kDLOpenCL; }
// get the queue of the device
@@ -314,6 +324,8 @@ class OpenCLWorkspace : public DeviceAPI {
void* AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType
dtype,
Optional<String> mem_scope = NullOpt) final;
void* GetNativePtr(const tvm::runtime::NDArray& narr);
+ void SetNativePtr(const tvm::runtime::NDArray& narr, void* host_ptr, size_t
buf_size);
+ void SetPerfHint(Device dev, cl_uint perf_hint);
void FreeDataSpace(Device dev, void* ptr) final;
void StreamSync(Device dev, TVMStreamHandle stream) final;
void* AllocWorkspace(Device dev, size_t size, DLDataType type_hint) final;
diff --git a/src/runtime/opencl/opencl_device_api.cc
b/src/runtime/opencl/opencl_device_api.cc
index 0057d0a101..7b161e8932 100644
--- a/src/runtime/opencl/opencl_device_api.cc
+++ b/src/runtime/opencl/opencl_device_api.cc
@@ -277,6 +277,47 @@ void* OpenCLWorkspace::GetNativePtr(const
tvm::runtime::NDArray& narr) {
return desc->host_ptr;
}
+void OpenCLWorkspace::SetNativePtr(const tvm::runtime::NDArray& narr, void*
host_ptr,
+ size_t buf_size) {
+ cl::BufferDescriptor* desc =
static_cast<cl::BufferDescriptor*>(narr.operator->()->data);
+
+ this->Init();
+ if (desc->layout == cl::BufferDescriptor::MemoryLayout::kBuffer1D) {
+#ifdef USE_OPENCL_EXTN_QCOM
+ Device dev = narr.operator->()->device;
+ cl_device_id device_id = GetCLDeviceID(dev.device_id);
+ auto platform = device_info[device_id].platform_id;
+
+ OPENCL_CALL(clFinish(this->GetQueue(dev)));
+ if (desc->host_ptr) {
+ OPENCL_CALL(clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer,
+
reinterpret_cast<void*>(desc->host_ptr), 0, nullptr,
+ nullptr));
+ desc->host_ptr = nullptr;
+ }
+ OPENCL_CALL(clReleaseMemObject(desc->buffer));
+
+ cl_int err_code;
+ desc->buffer =
+ clCreateBuffer(this->contexts[platform],
+ CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR |
CL_MEM_EXT_HOST_PTR_QCOM, buf_size,
+ host_ptr, &err_code);
+ desc->layout = cl::BufferDescriptor::MemoryLayout::kBuffer1D;
+ OPENCL_CHECK_ERROR(err_code);
+#endif
+ } else {
+ LOG(FATAL) << "Native Ptr not enabled over image objects";
+ }
+}
+
+void OpenCLWorkspace::SetPerfHint(Device dev, cl_uint perf_hint) {
+#ifdef CL_CONTEXT_PERF_HINT_QCOM
+ cl_device_id device_id = GetCLDeviceID(dev.device_id);
+ auto platform = device_info[device_id].platform_id;
+ OPENCL_CALL(clSetPerfHintQCOM(this->contexts[platform], perf_hint));
+#endif
+}
+
void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) {
// We have to make sure that the memory object is not in the command queue
// for some OpenCL platforms.
@@ -284,8 +325,9 @@ void OpenCLWorkspace::FreeDataSpace(Device dev, void* ptr) {
cl::BufferDescriptor* desc = static_cast<cl::BufferDescriptor*>(ptr);
if (desc->host_ptr) {
- clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer,
- reinterpret_cast<void*>(desc->host_ptr), 0,
nullptr, nullptr);
+ OPENCL_CALL(clEnqueueUnmapMemObject(this->GetQueue(dev), desc->buffer,
+
reinterpret_cast<void*>(desc->host_ptr), 0, nullptr,
+ nullptr));
}
OPENCL_CALL(clReleaseMemObject(desc->buffer));
delete desc;
@@ -473,7 +515,7 @@ bool MatchPlatformInfo(cl_platform_id pid, cl_platform_info
param_name, std::str
}
void OpenCLWorkspace::Init(const std::string& type_key, const std::string&
device_type,
- const std::string& platform_name) {
+ const std::string& platform_name,
cl_context_properties ctx_props[]) {
if (initialized_) return;
std::lock_guard<std::mutex> lock(this->mu);
if (initialized_) return;
@@ -539,7 +581,7 @@ void OpenCLWorkspace::Init(const std::string& type_key,
const std::string& devic
for (auto& [platform, devices] : device_map) {
this->platform_ids.push_back(platform);
this->contexts[platform] =
- clCreateContext(nullptr, devices.size(), &(devices[0]), nullptr,
nullptr, &err_code);
+ clCreateContext(ctx_props, devices.size(), &(devices[0]), nullptr,
nullptr, &err_code);
this->devices.insert(this->devices.end(), devices.begin(), devices.end());
for (size_t i = 0; i < devices.size(); ++i) {
cl_device_id did = devices[i];
diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc
index 2d1c33cbf2..f1768dfd77 100644
--- a/src/support/libinfo.cc
+++ b/src/support/libinfo.cc
@@ -63,6 +63,10 @@
#define TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR "NOT-FOUND"
#endif
+#ifndef TVM_INFO_USE_OPENCL_EXTN_QCOM
+#define TVM_INFO_USE_OPENCL_EXTN_QCOM "NOT-FOUND"
+#endif
+
#ifndef TVM_INFO_USE_OPENCL_GTEST
#define TVM_INFO_USE_OPENCL_GTEST "NOT-FOUND"
#endif
@@ -362,6 +366,7 @@ TVM_DLL Map<String, String> GetLibInfo() {
{"USE_NNPACK", TVM_INFO_USE_NNPACK},
{"USE_OPENCL", TVM_INFO_USE_OPENCL},
{"USE_OPENCL_ENABLE_HOST_PTR", TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR},
+ {"USE_OPENCL_EXTN_QCOM", TVM_INFO_USE_OPENCL_EXTN_QCOM},
{"USE_OPENCL_GTEST", TVM_INFO_USE_OPENCL_GTEST},
{"USE_OPENMP", TVM_INFO_USE_OPENMP},
{"USE_PAPI", TVM_INFO_USE_PAPI},
diff --git a/tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc
b/tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc
new file mode 100644
index 0000000000..1f3dc2057a
--- /dev/null
+++ b/tests/cpp-runtime/opencl/aa_opencl_qcom_extn.cc
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// Note:: This should be first tests to be executed.
+// hence, crafted the filename accordingly
+
+#include <gtest/gtest.h>
+#include <tvm/runtime/container/optional.h>
+
+#include "../src/runtime/opencl/opencl_common.h"
+
+using namespace tvm::runtime;
+using namespace tvm::runtime::cl;
+
+#ifdef USE_OPENCL_EXTN_QCOM
+#pragma message("Qualcomm OpenCL Extn GTests: enabled")
+TEST(QCOMExtn, ContextPriorityHint) {
+ OpenCLWorkspace* workspace = OpenCLWorkspace::Global();
+ cl_context_properties properties[] = {CL_CONTEXT_PRIORITY_HINT_QCOM,
CL_PRIORITY_HINT_LOW_QCOM,
+ 0};
+ // Only allow one time
+ ASSERT_EQ(workspace->Init(properties), true);
+ // Subsequent calls will be failure
+ ASSERT_EQ(workspace->Init(properties), false);
+}
+
+TEST(QCOMExtn, ContextPerfHint) {
+ OpenCLWorkspace* workspace = OpenCLWorkspace::Global();
+ auto dev = DLDevice{kDLOpenCL, 0};
+ workspace->SetPerfHint(dev, CL_PERF_HINT_HIGH_QCOM);
+}
+#else
+#pragma message("Qualcomm OpenCL Extn GTests: disabled")
+#endif
diff --git a/tests/scripts/task_build_adreno_bins.sh
b/tests/scripts/task_build_adreno_bins.sh
index 38eefd93a6..412af49281 100755
--- a/tests/scripts/task_build_adreno_bins.sh
+++ b/tests/scripts/task_build_adreno_bins.sh
@@ -50,6 +50,8 @@ echo set\(MACHINE_NAME aarch64-linux-gnu\) >> config.cmake
echo set\(USE_OPENCL_GTEST ON\) >> config.cmake
+echo set\(USE_OPENCL_EXTN_QCOM ON\) >> config.cmake
+
cmake
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake"
\
-DANDROID_ABI=arm64-v8a \
-DANDROID_PLATFORM=android-28 \