http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp new file mode 100644 index 0000000..e463e88 --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp @@ -0,0 +1,101 @@ +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl.hpp" +#include "viennacl/backend/mem_handle.hpp" + + + +static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +{ +#ifdef VIENNACL_WITH_CUDA + h.switch_active_handle_id(viennacl::CUDA_MEMORY); + h.cuda_handle().reset(A->cuda_mem); + h.cuda_handle().inc(); + if (A->precision == ViennaCLFloat) + h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness + else if (A->precision == ViennaCLDouble) + h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness + else + return ViennaCLGenericFailure; + + return ViennaCLSuccess; +#else + (void)h; + (void)A; + return ViennaCLGenericFailure; +#endif +} + +static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +{ +#ifdef VIENNACL_WITH_OPENCL + h.switch_active_handle_id(viennacl::OPENCL_MEMORY); + h.opencl_handle() = A->opencl_mem; + h.opencl_handle().inc(); + if (A->precision == ViennaCLFloat) + h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness + else if (A->precision == ViennaCLDouble) + h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness + else + return ViennaCLGenericFailure; + + return ViennaCLSuccess; +#else + (void)h; + (void)A; + return ViennaCLGenericFailure; +#endif +} + + +static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +{ + h.switch_active_handle_id(viennacl::MAIN_MEMORY); + h.ram_handle().reset(A->host_mem); + h.ram_handle().inc(); + if (A->precision == ViennaCLFloat) + h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness + else if (A->precision == ViennaCLDouble) + h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness + else + return ViennaCLGenericFailure; + + return ViennaCLSuccess; +} + + +static ViennaCLStatus init_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) +{ + switch (A->backend->backend_type) + { + case ViennaCLCUDA: + return init_cuda_matrix(h, A); + + case ViennaCLOpenCL: + return init_opencl_matrix(h, A); + + case ViennaCLHost: + return init_host_matrix(h, A); + + default: + return ViennaCLGenericFailure; + } +} + + +
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp new file mode 100644 index 0000000..8be00d7 --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp @@ -0,0 +1,101 @@ +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include "viennacl.hpp" +#include "viennacl/backend/mem_handle.hpp" + + + +static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +{ +#ifdef VIENNACL_WITH_CUDA + h.switch_active_handle_id(viennacl::CUDA_MEMORY); + h.cuda_handle().reset(x->cuda_mem); + h.cuda_handle().inc(); + if (x->precision == ViennaCLFloat) + h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * x->size * sizeof(float)); // not necessary, but still set for conciseness + else if (x->precision == ViennaCLDouble) + h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * x->size * sizeof(double)); // not necessary, but still set for conciseness + else + return ViennaCLGenericFailure; + + return ViennaCLSuccess; +#else + (void)h; + (void)x; + return ViennaCLGenericFailure; +#endif +} + +static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +{ +#ifdef VIENNACL_WITH_OPENCL + h.switch_active_handle_id(viennacl::OPENCL_MEMORY); + h.opencl_handle() = x->opencl_mem; + h.opencl_handle().inc(); + if (x->precision == ViennaCLFloat) + h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(float)); // not necessary, but still set for conciseness + else if (x->precision == ViennaCLDouble) + h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(double)); // not necessary, but still set for conciseness + else + return ViennaCLGenericFailure; + + return ViennaCLSuccess; +#else + (void)h; + (void)x; + return ViennaCLGenericFailure; +#endif +} + + +static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +{ + h.switch_active_handle_id(viennacl::MAIN_MEMORY); + h.ram_handle().reset(x->host_mem); + h.ram_handle().inc(); + if (x->precision == ViennaCLFloat) + h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(float)); // not necessary, but still set for conciseness + else if (x->precision == ViennaCLDouble) + h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(double)); // not necessary, but still set for conciseness + else + return ViennaCLGenericFailure; + + return ViennaCLSuccess; +} + + +static ViennaCLStatus init_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) +{ + switch (x->backend->backend_type) + { + case ViennaCLCUDA: + return init_cuda_vector(h, x); + + case ViennaCLOpenCL: + return init_opencl_vector(h, x); + + case ViennaCLHost: + return init_host_vector(h, x); + + default: + return ViennaCLGenericFailure; + } +} + + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp new file mode 100644 index 0000000..c66c848 --- /dev/null +++ b/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp @@ -0,0 +1,141 @@ +#ifndef VIENNACL_VIENNACL_PRIVATE_HPP +#define VIENNACL_VIENNACL_PRIVATE_HPP + + +/* ========================================================================= + Copyright (c) 2010-2014, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +#include <stdlib.h> + +#ifdef VIENNACL_WITH_OPENCL +#ifdef __APPLE__ +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif +#endif + +#include "viennacl.hpp" + + +/************* Backend Management ******************/ + +struct ViennaCLCUDABackend_impl +{ + //TODO: Add stream and/or device descriptors here +}; + +struct ViennaCLOpenCLBackend_impl +{ + ViennaCLInt context_id; +}; + +struct ViennaCLHostBackend_impl +{ + // Nothing to specify *at the moment* +}; + + +/** @brief Generic backend for CUDA, OpenCL, host-based stuff */ +struct ViennaCLBackend_impl +{ + ViennaCLBackendTypes backend_type; + + ViennaCLCUDABackend_impl cuda_backend; + ViennaCLOpenCLBackend_impl opencl_backend; + ViennaCLHostBackend_impl host_backend; +}; + + + +/******** User Types **********/ + +struct ViennaCLHostScalar_impl +{ + ViennaCLPrecision precision; + + union { + float value_float; + double value_double; + }; +}; + +struct ViennaCLScalar_impl +{ + ViennaCLBackend backend; + ViennaCLPrecision precision; + + // buffer: +#ifdef VIENNACL_WITH_CUDA + char * cuda_mem; +#endif +#ifdef VIENNACL_WITH_OPENCL + cl_mem opencl_mem; +#endif + char * host_mem; + + ViennaCLInt offset; +}; + +struct ViennaCLVector_impl +{ + ViennaCLBackend backend; + ViennaCLPrecision precision; + + // buffer: +#ifdef VIENNACL_WITH_CUDA + char * cuda_mem; +#endif +#ifdef VIENNACL_WITH_OPENCL + cl_mem opencl_mem; +#endif + char * host_mem; + + ViennaCLInt offset; + ViennaCLInt inc; + ViennaCLInt size; +}; + +struct ViennaCLMatrix_impl +{ + ViennaCLBackend backend; + ViennaCLPrecision precision; + ViennaCLOrder order; + ViennaCLTranspose trans; + + // buffer: +#ifdef VIENNACL_WITH_CUDA + char * cuda_mem; +#endif +#ifdef VIENNACL_WITH_OPENCL + cl_mem opencl_mem; +#endif + char * host_mem; + + ViennaCLInt size1; + ViennaCLInt start1; + ViennaCLInt stride1; + ViennaCLInt internal_size1; + + ViennaCLInt size2; + ViennaCLInt start2; + ViennaCLInt stride2; + ViennaCLInt internal_size2; +}; + + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp new file mode 100644 index 0000000..ccfd035 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp @@ -0,0 +1,171 @@ +#ifndef VIENNACL_BACKEND_CPU_RAM_HPP_ +#define VIENNACL_BACKEND_CPU_RAM_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/backend/cpu_ram.hpp + @brief Implementations for the OpenCL backend functionality +*/ + +#include <cassert> +#include <vector> +#ifdef VIENNACL_WITH_AVX2 +#include <stdlib.h> +#endif + +#include "viennacl/forwards.h" +#include "viennacl/tools/shared_ptr.hpp" + +namespace viennacl +{ +namespace backend +{ +namespace cpu_ram +{ +typedef viennacl::tools::shared_ptr<char> handle_type; +// Requirements for backend: + +// * memory_create(size, host_ptr) +// * memory_copy(src, dest, offset_src, offset_dest, size) +// * memory_write_from_main_memory(src, offset, size, +// dest, offset, size) +// * memory_read_to_main_memory(src, offset, size +// dest, offset, size) +// * +// + +namespace detail +{ + /** @brief Helper struct for deleting an pointer to an array */ + template<class U> + struct array_deleter + { +#ifdef VIENNACL_WITH_AVX2 + void operator()(U* p) const { free(p); } +#else + void operator()(U* p) const { delete[] p; } +#endif + }; + +} + +/** @brief Creates an array of the specified size in main RAM. If the second argument is provided, the buffer is initialized with data from that pointer. + * + * @param size_in_bytes Number of bytes to allocate + * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. + * + */ +inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL) +{ +#ifdef VIENNACL_WITH_AVX2 + // Note: aligned_alloc not available on all compilers. Consider platform-specific alternatives such as posix_memalign() + if (!host_ptr) + return handle_type(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>()); + + handle_type new_handle(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>()); +#else + if (!host_ptr) + return handle_type(new char[size_in_bytes], detail::array_deleter<char>()); + + handle_type new_handle(new char[size_in_bytes], detail::array_deleter<char>()); +#endif + + // copy data: + char * raw_ptr = new_handle.get(); + const char * data_ptr = static_cast<const char *>(host_ptr); +#ifdef VIENNACL_WITH_OPENMP + #pragma omp parallel for +#endif + for (long i=0; i<long(size_in_bytes); ++i) + raw_ptr[i] = data_ptr[i]; + + return new_handle; +} + +/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'. + * + * @param src_buffer A smart pointer to the begin of an allocated buffer + * @param dst_buffer A smart pointer to the end of an allocated buffer + * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes) + * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) + * @param bytes_to_copy Number of bytes to be copied + */ +inline void memory_copy(handle_type const & src_buffer, + handle_type & dst_buffer, + vcl_size_t src_offset, + vcl_size_t dst_offset, + vcl_size_t bytes_to_copy) +{ + assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); + assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); + +#ifdef VIENNACL_WITH_OPENMP + #pragma omp parallel for +#endif + for (long i=0; i<long(bytes_to_copy); ++i) + dst_buffer.get()[vcl_size_t(i)+dst_offset] = src_buffer.get()[vcl_size_t(i) + src_offset]; +} + +/** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer' + * + * @param dst_buffer A smart pointer to the beginning of an allocated buffer + * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes) + * @param bytes_to_copy Number of bytes to be copied + * @param ptr Pointer to the first byte to be written + */ +inline void memory_write(handle_type & dst_buffer, + vcl_size_t dst_offset, + vcl_size_t bytes_to_copy, + const void * ptr, + bool /*async*/) +{ + assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); + +#ifdef VIENNACL_WITH_OPENMP + #pragma omp parallel for +#endif + for (long i=0; i<long(bytes_to_copy); ++i) + dst_buffer.get()[vcl_size_t(i)+dst_offset] = static_cast<const char *>(ptr)[i]; +} + +/** @brief Reads data from a buffer back to main RAM. + * + * @param src_buffer A smart pointer to the beginning of an allocated source buffer + * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_ + * @param bytes_to_copy Number of bytes to be read + * @param ptr Location in main RAM where to read data should be written to + */ +inline void memory_read(handle_type const & src_buffer, + vcl_size_t src_offset, + vcl_size_t bytes_to_copy, + void * ptr, + bool /*async*/) +{ + assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); + +#ifdef VIENNACL_WITH_OPENMP + #pragma omp parallel for +#endif + for (long i=0; i<long(bytes_to_copy); ++i) + static_cast<char *>(ptr)[i] = src_buffer.get()[vcl_size_t(i)+src_offset]; +} + +} +} //backend +} //viennacl +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp new file mode 100644 index 0000000..641bfea --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp @@ -0,0 +1,206 @@ +#ifndef VIENNACL_BACKEND_CUDA_HPP_ +#define VIENNACL_BACKEND_CUDA_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/backend/cuda.hpp + @brief Implementations for the CUDA backend functionality +*/ + + +#include <iostream> +#include <vector> +#include <cassert> +#include <stdexcept> +#include <sstream> + +#include "viennacl/forwards.h" +#include "viennacl/tools/shared_ptr.hpp" + +// includes CUDA +#include <cuda_runtime.h> + +#define VIENNACL_CUDA_ERROR_CHECK(err) detail::cuda_error_check (err, __FILE__, __LINE__) + +namespace viennacl +{ +namespace backend +{ +namespace cuda +{ + +typedef viennacl::tools::shared_ptr<char> handle_type; +// Requirements for backend: + +// * memory_create(size, host_ptr) +// * memory_copy(src, dest, offset_src, offset_dest, size) +// * memory_write_from_main_memory(src, offset, size, +// dest, offset, size) +// * memory_read_to_main_memory(src, offset, size +// dest, offset, size) +// * +// + +class cuda_exception : public std::runtime_error +{ +public: + cuda_exception(std::string const & what_arg, cudaError_t err_code) : std::runtime_error(what_arg), error_code_(err_code) {} + + cudaError_t error_code() const { return error_code_; } + +private: + cudaError_t error_code_; +}; + +namespace detail +{ + + inline void cuda_error_check(cudaError error_code, const char *file, const int line ) + { + if (cudaSuccess != error_code) + { + std::stringstream ss; + ss << file << "(" << line << "): " << ": CUDA Runtime API error " << error_code << ": " << cudaGetErrorString( error_code ) << std::endl; + throw viennacl::backend::cuda::cuda_exception(ss.str(), error_code); + } + } + + + /** @brief Functor for deleting a CUDA handle. Used within the smart pointer class. */ + template<typename U> + struct cuda_deleter + { + void operator()(U * p) const + { + //std::cout << "Freeing handle " << reinterpret_cast<void *>(p) << std::endl; + cudaFree(p); + } + }; + +} + +/** @brief Creates an array of the specified size on the CUDA device. If the second argument is provided, the buffer is initialized with data from that pointer. + * + * @param size_in_bytes Number of bytes to allocate + * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. + * + */ +inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL) +{ + void * dev_ptr = NULL; + VIENNACL_CUDA_ERROR_CHECK( cudaMalloc(&dev_ptr, size_in_bytes) ); + //std::cout << "Allocated new dev_ptr " << dev_ptr << " of size " << size_in_bytes << std::endl; + + if (!host_ptr) + return handle_type(reinterpret_cast<char *>(dev_ptr), detail::cuda_deleter<char>()); + + handle_type new_handle(reinterpret_cast<char*>(dev_ptr), detail::cuda_deleter<char>()); + + // copy data: + //std::cout << "Filling new handle from host_ptr " << host_ptr << std::endl; + cudaMemcpy(new_handle.get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice); + + return new_handle; +} + + +/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory starting at address 'dst_buffer + dst_offset' on the same CUDA device. + * + * @param src_buffer A smart pointer to the begin of an allocated CUDA buffer + * @param dst_buffer A smart pointer to the end of an allocated CUDA buffer + * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes) + * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) + * @param bytes_to_copy Number of bytes to be copied + */ +inline void memory_copy(handle_type const & src_buffer, + handle_type & dst_buffer, + vcl_size_t src_offset, + vcl_size_t dst_offset, + vcl_size_t bytes_to_copy) +{ + assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); + assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); + + cudaMemcpy(reinterpret_cast<void *>(dst_buffer.get() + dst_offset), + reinterpret_cast<void *>(src_buffer.get() + src_offset), + bytes_to_copy, + cudaMemcpyDeviceToDevice); +} + + +/** @brief Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer' + * + * @param dst_buffer A smart pointer to the beginning of an allocated CUDA buffer + * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes) + * @param bytes_to_copy Number of bytes to be copied + * @param ptr Pointer to the first byte to be written + * @param async Whether the operation should be asynchronous + */ +inline void memory_write(handle_type & dst_buffer, + vcl_size_t dst_offset, + vcl_size_t bytes_to_copy, + const void * ptr, + bool async = false) +{ + assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); + + if (async) + cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset, + reinterpret_cast<const char *>(ptr), + bytes_to_copy, + cudaMemcpyHostToDevice); + else + cudaMemcpy(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset, + reinterpret_cast<const char *>(ptr), + bytes_to_copy, + cudaMemcpyHostToDevice); +} + + +/** @brief Reads data from a CUDA buffer back to main RAM. + * + * @param src_buffer A smart pointer to the beginning of an allocated CUDA source buffer + * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_ + * @param bytes_to_copy Number of bytes to be read + * @param ptr Location in main RAM where to read data should be written to + * @param async Whether the operation should be asynchronous + */ +inline void memory_read(handle_type const & src_buffer, + vcl_size_t src_offset, + vcl_size_t bytes_to_copy, + void * ptr, + bool async = false) +{ + assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); + + if (async) + cudaMemcpyAsync(reinterpret_cast<char *>(ptr), + reinterpret_cast<char *>(src_buffer.get()) + src_offset, + bytes_to_copy, + cudaMemcpyDeviceToHost); + else + cudaMemcpy(reinterpret_cast<char *>(ptr), + reinterpret_cast<char *>(src_buffer.get()) + src_offset, + bytes_to_copy, + cudaMemcpyDeviceToHost); +} + +} //cuda +} //backend +} //viennacl +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp new file mode 100644 index 0000000..37c680b --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp @@ -0,0 +1,250 @@ +#ifndef VIENNACL_BACKEND_MEM_HANDLE_HPP +#define VIENNACL_BACKEND_MEM_HANDLE_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/backend/mem_handle.hpp + @brief Implements the multi-memory-domain handle +*/ + +#include <vector> +#include <cassert> +#include "viennacl/forwards.h" +#include "viennacl/tools/shared_ptr.hpp" +#include "viennacl/backend/cpu_ram.hpp" + +#ifdef VIENNACL_WITH_OPENCL +#include "viennacl/backend/opencl.hpp" +#endif + +#ifdef VIENNACL_WITH_CUDA +#include "viennacl/backend/cuda.hpp" +#endif + + +namespace viennacl +{ +namespace backend +{ + +namespace detail +{ + /** @brief Singleton for managing the default memory type. + * + * @param new_mem_type If NULL, returns the current memory type. Otherwise, sets the memory type to the provided value. + */ + inline memory_types get_set_default_memory_type(memory_types * new_mem_type) + { + // if a user compiles with CUDA, it is reasonable to expect that CUDA should be the default +#ifdef VIENNACL_WITH_CUDA + static memory_types mem_type = CUDA_MEMORY; +#elif defined(VIENNACL_WITH_OPENCL) + static memory_types mem_type = OPENCL_MEMORY; +#else + static memory_types mem_type = MAIN_MEMORY; +#endif + + if (new_mem_type) + mem_type = *new_mem_type; + + return mem_type; + } +} + +/** @brief Returns the default memory type for the given configuration. + * + * CUDA has precedence over OpenCL, which has precedence over main memory. Depends on which VIENNACL_WITH_{CUDA/OPENCL/OPENMP} macros are defined. + */ +inline memory_types default_memory_type() { return detail::get_set_default_memory_type(NULL); } + +/** @brief Sets the default memory type for the given configuration. + * + * Make sure the respective new memory type is enabled. + * For example, passing CUDA_MEMORY if no CUDA backend is selected will result in exceptions being thrown as soon as you try to allocate buffers. + */ +inline memory_types default_memory_type(memory_types new_memory_type) { return detail::get_set_default_memory_type(&new_memory_type); } + + +/** @brief Main abstraction class for multiple memory domains. Represents a buffer in either main RAM, an OpenCL context, or a CUDA device. + * + * The idea is to wrap all possible handle types inside this class so that higher-level code does not need to be cluttered with preprocessor switches. + * Instead, this class collects all the necessary conditional compilations. + * + */ +class mem_handle +{ +public: + typedef viennacl::tools::shared_ptr<char> ram_handle_type; + typedef viennacl::tools::shared_ptr<char> cuda_handle_type; + + /** @brief Default CTOR. No memory is allocated */ + mem_handle() : active_handle_(MEMORY_NOT_INITIALIZED), size_in_bytes_(0) {} + + /** @brief Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated. */ + ram_handle_type & ram_handle() { return ram_handle_; } + /** @brief Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated. */ + ram_handle_type const & ram_handle() const { return ram_handle_; } + +#ifdef VIENNACL_WITH_OPENCL + /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */ + viennacl::ocl::handle<cl_mem> & opencl_handle() { return opencl_handle_; } + /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */ + viennacl::ocl::handle<cl_mem> const & opencl_handle() const { return opencl_handle_; } +#endif + +#ifdef VIENNACL_WITH_CUDA + /** @brief Returns the handle to a CUDA buffer. The handle contains NULL if no such buffer has been allocated. */ + cuda_handle_type & cuda_handle() { return cuda_handle_; } + /** @brief Returns the handle to a CUDA buffer. The handle contains NULL if no such buffer has been allocated. */ + cuda_handle_type const & cuda_handle() const { return cuda_handle_; } +#endif + + /** @brief Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no data. */ + memory_types get_active_handle_id() const { return active_handle_; } + + /** @brief Switches the currently active handle. If no support for that backend is provided, an exception is thrown. */ + void switch_active_handle_id(memory_types new_id) + { + if (new_id != active_handle_) + { + if (active_handle_ == MEMORY_NOT_INITIALIZED) + active_handle_ = new_id; + else if (active_handle_ == MAIN_MEMORY) + { + active_handle_ = new_id; + } + else if (active_handle_ == OPENCL_MEMORY) + { +#ifdef VIENNACL_WITH_OPENCL + active_handle_ = new_id; +#else + throw memory_exception("compiled without OpenCL suppport!"); +#endif + } + else if (active_handle_ == CUDA_MEMORY) + { +#ifdef VIENNACL_WITH_CUDA + active_handle_ = new_id; +#else + throw memory_exception("compiled without CUDA suppport!"); +#endif + } + else + throw memory_exception("invalid new memory region!"); + } + } + + /** @brief Compares the two handles and returns true if the active memory handles in the two mem_handles point to the same buffer. */ + bool operator==(mem_handle const & other) const + { + if (active_handle_ != other.active_handle_) + return false; + + switch (active_handle_) + { + case MAIN_MEMORY: + return ram_handle_.get() == other.ram_handle_.get(); +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + return opencl_handle_.get() == other.opencl_handle_.get(); +#endif +#ifdef VIENNACL_WITH_CUDA + case CUDA_MEMORY: + return cuda_handle_.get() == other.cuda_handle_.get(); +#endif + default: break; + } + + return false; + } + + /** @brief Compares the two handles and returns true if the active memory handles in the two mem_handles point a buffer with inferior address + * useful to store handles into a map, since they naturally have strong ordering + */ + bool operator<(mem_handle const & other) const + { + if (active_handle_ != other.active_handle_) + return false; + + switch (active_handle_) + { + case MAIN_MEMORY: + return ram_handle_.get() < other.ram_handle_.get(); +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + return opencl_handle_.get() < other.opencl_handle_.get(); +#endif +#ifdef VIENNACL_WITH_CUDA + case CUDA_MEMORY: + return cuda_handle_.get() < other.cuda_handle_.get(); +#endif + default: break; + } + + return false; + } + + + bool operator!=(mem_handle const & other) const { return !(*this == other); } + + /** @brief Implements a fast swapping method. No data is copied, only the handles are exchanged. */ + void swap(mem_handle & other) + { + // swap handle type: + memory_types active_handle_tmp = other.active_handle_; + other.active_handle_ = active_handle_; + active_handle_ = active_handle_tmp; + + // swap ram handle: + ram_handle_type ram_handle_tmp = other.ram_handle_; + other.ram_handle_ = ram_handle_; + ram_handle_ = ram_handle_tmp; + + // swap OpenCL handle: +#ifdef VIENNACL_WITH_OPENCL + opencl_handle_.swap(other.opencl_handle_); +#endif +#ifdef VIENNACL_WITH_CUDA + cuda_handle_type cuda_handle_tmp = other.cuda_handle_; + other.cuda_handle_ = cuda_handle_; + cuda_handle_ = cuda_handle_tmp; +#endif + } + + /** @brief Returns the number of bytes of the currently active buffer */ + vcl_size_t raw_size() const { return size_in_bytes_; } + + /** @brief Sets the size of the currently active buffer. Use with care! */ + void raw_size(vcl_size_t new_size) { size_in_bytes_ = new_size; } + +private: + memory_types active_handle_; + ram_handle_type ram_handle_; +#ifdef VIENNACL_WITH_OPENCL + viennacl::ocl::handle<cl_mem> opencl_handle_; +#endif +#ifdef VIENNACL_WITH_CUDA + cuda_handle_type cuda_handle_; +#endif + vcl_size_t size_in_bytes_; +}; + + +} //backend +} //viennacl +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp new file mode 100644 index 0000000..d6f29a5 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp @@ -0,0 +1,628 @@ +#ifndef VIENNACL_BACKEND_MEMORY_HPP +#define VIENNACL_BACKEND_MEMORY_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/backend/memory.hpp + @brief Main interface routines for memory management +*/ + +#include <vector> +#include <cassert> +#include "viennacl/forwards.h" +#include "viennacl/backend/mem_handle.hpp" +#include "viennacl/context.hpp" +#include "viennacl/traits/handle.hpp" +#include "viennacl/traits/context.hpp" +#include "viennacl/backend/util.hpp" + +#include "viennacl/backend/cpu_ram.hpp" + +#ifdef VIENNACL_WITH_OPENCL +#include "viennacl/backend/opencl.hpp" +#include "viennacl/ocl/backend.hpp" +#endif + +#ifdef VIENNACL_WITH_CUDA +#include "viennacl/backend/cuda.hpp" +#endif + + +namespace viennacl +{ +namespace backend +{ + + + // if a user compiles with CUDA, it is reasonable to expect that CUDA should be the default + /** @brief Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. */ + inline void finish() + { +#ifdef VIENNACL_WITH_CUDA + cudaDeviceSynchronize(); +#endif +#ifdef VIENNACL_WITH_OPENCL + viennacl::ocl::get_queue().finish(); +#endif + } + + + + + // Requirements for backend: + + // ---- Memory ---- + // + // * memory_create(size, host_ptr) + // * memory_copy(src, dest, offset_src, offset_dest, size) + // * memory_write(src, offset, size, ptr) + // * memory_read(src, offset, size, ptr) + // + + /** @brief Creates an array of the specified size. If the second argument is provided, the buffer is initialized with data from that pointer. + * + * This is the generic version for CPU RAM, CUDA, and OpenCL. Creates the memory in the currently active memory domain. + * + * @param handle The generic wrapper handle for multiple memory domains which will hold the new buffer. + * @param size_in_bytes Number of bytes to allocate + * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) + * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. + * + */ + inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL) + { + if (size_in_bytes > 0) + { + if (handle.get_active_handle_id() == MEMORY_NOT_INITIALIZED) + handle.switch_active_handle_id(ctx.memory_type()); + + switch (handle.get_active_handle_id()) + { + case MAIN_MEMORY: + handle.ram_handle() = cpu_ram::memory_create(size_in_bytes, host_ptr); + handle.raw_size(size_in_bytes); + break; +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + handle.opencl_handle().context(ctx.opencl_context()); + handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr); + handle.raw_size(size_in_bytes); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case CUDA_MEMORY: + handle.cuda_handle() = cuda::memory_create(size_in_bytes, host_ptr); + handle.raw_size(size_in_bytes); + break; +#endif + case MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("unknown memory handle!"); + } + } + } + + /* + inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, const void * host_ptr = NULL) + { + viennacl::context ctx(default_memory_type()); + memory_create(handle, size_in_bytes, ctx, host_ptr); + }*/ + + + /** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'. + * + * This is the generic version for CPU RAM, CUDA, and OpenCL. Copies the memory in the currently active memory domain. + * + * + * @param src_buffer A smart pointer to the begin of an allocated buffer + * @param dst_buffer A smart pointer to the end of an allocated buffer + * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes) + * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) + * @param bytes_to_copy Number of bytes to be copied + */ + inline void memory_copy(mem_handle const & src_buffer, + mem_handle & dst_buffer, + vcl_size_t src_offset, + vcl_size_t dst_offset, + vcl_size_t bytes_to_copy) + { + assert( src_buffer.get_active_handle_id() == dst_buffer.get_active_handle_id() && bool("memory_copy() must be called on buffers from the same domain") ); + + if (bytes_to_copy > 0) + { + switch (src_buffer.get_active_handle_id()) + { + case MAIN_MEMORY: + cpu_ram::memory_copy(src_buffer.ram_handle(), dst_buffer.ram_handle(), src_offset, dst_offset, bytes_to_copy); + break; +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + opencl::memory_copy(src_buffer.opencl_handle(), dst_buffer.opencl_handle(), src_offset, dst_offset, bytes_to_copy); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case CUDA_MEMORY: + cuda::memory_copy(src_buffer.cuda_handle(), dst_buffer.cuda_handle(), src_offset, dst_offset, bytes_to_copy); + break; +#endif + case MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("unknown memory handle!"); + } + } + } + + // TODO: Refine this concept. Maybe move to constructor? + /** @brief A 'shallow' copy operation from an initialized buffer to an uninitialized buffer. + * The uninitialized buffer just copies the raw handle. + */ + inline void memory_shallow_copy(mem_handle const & src_buffer, + mem_handle & dst_buffer) + { + assert( (dst_buffer.get_active_handle_id() == MEMORY_NOT_INITIALIZED) && bool("Shallow copy on already initialized memory not supported!")); + + switch (src_buffer.get_active_handle_id()) + { + case MAIN_MEMORY: + dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); + dst_buffer.ram_handle() = src_buffer.ram_handle(); + dst_buffer.raw_size(src_buffer.raw_size()); + break; +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); + dst_buffer.opencl_handle() = src_buffer.opencl_handle(); + dst_buffer.raw_size(src_buffer.raw_size()); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case CUDA_MEMORY: + dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); + dst_buffer.cuda_handle() = src_buffer.cuda_handle(); + dst_buffer.raw_size(src_buffer.raw_size()); + break; +#endif + case MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("unknown memory handle!"); + } + } + + /** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer' + * + * This is the generic version for CPU RAM, CUDA, and OpenCL. Writes the memory in the currently active memory domain. + * + * @param dst_buffer A smart pointer to the beginning of an allocated buffer + * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes) + * @param bytes_to_write Number of bytes to be written + * @param ptr Pointer to the first byte to be written + * @param async Whether the operation should be asynchronous + */ + inline void memory_write(mem_handle & dst_buffer, + vcl_size_t dst_offset, + vcl_size_t bytes_to_write, + const void * ptr, + bool async = false) + { + if (bytes_to_write > 0) + { + switch (dst_buffer.get_active_handle_id()) + { + case MAIN_MEMORY: + cpu_ram::memory_write(dst_buffer.ram_handle(), dst_offset, bytes_to_write, ptr, async); + break; +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + opencl::memory_write(dst_buffer.opencl_handle(), dst_offset, bytes_to_write, ptr, async); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case CUDA_MEMORY: + cuda::memory_write(dst_buffer.cuda_handle(), dst_offset, bytes_to_write, ptr, async); + break; +#endif + case MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("unknown memory handle!"); + } + } + } + + /** @brief Reads data from a buffer back to main RAM. + * + * This is the generic version for CPU RAM, CUDA, and OpenCL. Reads the memory from the currently active memory domain. + * + * @param src_buffer A smart pointer to the beginning of an allocated source buffer + * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_ + * @param bytes_to_read Number of bytes to be read + * @param ptr Location in main RAM where to read data should be written to + * @param async Whether the operation should be asynchronous + */ + inline void memory_read(mem_handle const & src_buffer, + vcl_size_t src_offset, + vcl_size_t bytes_to_read, + void * ptr, + bool async = false) + { + //finish(); //Fixes some issues with AMD APP SDK. However, might sacrifice a few percents of performance in some cases. + + if (bytes_to_read > 0) + { + switch (src_buffer.get_active_handle_id()) + { + case MAIN_MEMORY: + cpu_ram::memory_read(src_buffer.ram_handle(), src_offset, bytes_to_read, ptr, async); + break; +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + opencl::memory_read(src_buffer.opencl_handle(), src_offset, bytes_to_read, ptr, async); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case CUDA_MEMORY: + cuda::memory_read(src_buffer.cuda_handle(), src_offset, bytes_to_read, ptr, async); + break; +#endif + case MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("unknown memory handle!"); + } + } + } + + + + namespace detail + { + template<typename T> + vcl_size_t element_size(memory_types /* mem_type */) + { + return sizeof(T); + } + + + template<> + inline vcl_size_t element_size<unsigned long>(memory_types + #ifdef VIENNACL_WITH_OPENCL + mem_type //in order to compile cleanly at -Wextra in GCC + #endif + ) + { +#ifdef VIENNACL_WITH_OPENCL + if (mem_type == OPENCL_MEMORY) + return sizeof(cl_ulong); +#endif + return sizeof(unsigned long); + } + + template<> + inline vcl_size_t element_size<long>(memory_types + #ifdef VIENNACL_WITH_OPENCL + mem_type //in order to compile cleanly at -Wextra in GCC + #endif + ) + { +#ifdef VIENNACL_WITH_OPENCL + if (mem_type == OPENCL_MEMORY) + return sizeof(cl_long); +#endif + return sizeof(long); + } + + + template<> + inline vcl_size_t element_size<unsigned int>(memory_types + #ifdef VIENNACL_WITH_OPENCL + mem_type //in order to compile cleanly at -Wextra in GCC + #endif + ) + { +#ifdef VIENNACL_WITH_OPENCL + if (mem_type == OPENCL_MEMORY) + return sizeof(cl_uint); +#endif + return sizeof(unsigned int); + } + + template<> + inline vcl_size_t element_size<int>(memory_types + #ifdef VIENNACL_WITH_OPENCL + mem_type //in order to compile cleanly at -Wextra in GCC + #endif + ) + { +#ifdef VIENNACL_WITH_OPENCL + if (mem_type == OPENCL_MEMORY) + return sizeof(cl_int); +#endif + return sizeof(int); + } + + + } + + + /** @brief Switches the active memory domain within a memory handle. Data is copied if the new active domain differs from the old one. Memory in the source handle is not free'd. */ + template<typename DataType> + void switch_memory_context(mem_handle & handle, viennacl::context new_ctx) + { + if (handle.get_active_handle_id() == new_ctx.memory_type()) + return; + + if (handle.get_active_handle_id() == viennacl::MEMORY_NOT_INITIALIZED || handle.raw_size() == 0) + { + handle.switch_active_handle_id(new_ctx.memory_type()); +#ifdef VIENNACL_WITH_OPENCL + if (new_ctx.memory_type() == OPENCL_MEMORY) + handle.opencl_handle().context(new_ctx.opencl_context()); +#endif + return; + } + + vcl_size_t size_dst = detail::element_size<DataType>(handle.get_active_handle_id()); + vcl_size_t size_src = detail::element_size<DataType>(new_ctx.memory_type()); + + if (size_dst != size_src) // OpenCL data element size not the same as host data element size + { + throw memory_exception("Heterogeneous data element sizes not yet supported!"); + } + else //no data conversion required + { + if (handle.get_active_handle_id() == MAIN_MEMORY) //we can access the existing data directly + { + switch (new_ctx.memory_type()) + { +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + handle.opencl_handle().context(new_ctx.opencl_context()); + handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), handle.raw_size(), handle.ram_handle().get()); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case CUDA_MEMORY: + handle.cuda_handle() = cuda::memory_create(handle.raw_size(), handle.ram_handle().get()); + break; +#endif + case MAIN_MEMORY: + default: + throw memory_exception("Invalid destination domain"); + } + } +#ifdef VIENNACL_WITH_OPENCL + else if (handle.get_active_handle_id() == OPENCL_MEMORY) // data can be dumped into destination directly + { + std::vector<DataType> buffer; + + switch (new_ctx.memory_type()) + { + case MAIN_MEMORY: + handle.ram_handle() = cpu_ram::memory_create(handle.raw_size()); + opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), handle.ram_handle().get()); + break; +#ifdef VIENNACL_WITH_CUDA + case CUDA_MEMORY: + buffer.resize(handle.raw_size() / sizeof(DataType)); + opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), &(buffer[0])); + cuda::memory_create(handle.cuda_handle(), handle.raw_size(), &(buffer[0])); + break; +#endif + default: + throw memory_exception("Invalid destination domain"); + } + } +#endif +#ifdef VIENNACL_WITH_CUDA + else //CUDA_MEMORY + { + std::vector<DataType> buffer; + + // write + switch (new_ctx.memory_type()) + { + case MAIN_MEMORY: + handle.ram_handle() = cpu_ram::memory_create(handle.raw_size()); + cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), handle.ram_handle().get()); + break; +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + buffer.resize(handle.raw_size() / sizeof(DataType)); + cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), &(buffer[0])); + handle.opencl_handle() = opencl::memory_create(handle.raw_size(), &(buffer[0])); + break; +#endif + default: + throw memory_exception("Unsupported source memory domain"); + } + } +#endif + + // everything succeeded so far, now switch to new domain: + handle.switch_active_handle_id(new_ctx.memory_type()); + + } // no data conversion + } + + + + /** @brief Copies data of the provided 'DataType' from 'handle_src' to 'handle_dst' and converts the data if the binary representation of 'DataType' among the memory domains differs. */ + template<typename DataType> + void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst) + { + if (handle_dst.get_active_handle_id() == MEMORY_NOT_INITIALIZED) + handle_dst.switch_active_handle_id(default_memory_type()); + + vcl_size_t element_size_src = detail::element_size<DataType>(handle_src.get_active_handle_id()); + vcl_size_t element_size_dst = detail::element_size<DataType>(handle_dst.get_active_handle_id()); + + if (element_size_src != element_size_dst) + { + // Data needs to be converted. + + typesafe_host_array<DataType> buffer_src(handle_src); + typesafe_host_array<DataType> buffer_dst(handle_dst, handle_src.raw_size() / element_size_src); + + // + // Step 1: Fill buffer_dst depending on where the data resides: + // + DataType const * src_data; + switch (handle_src.get_active_handle_id()) + { + case MAIN_MEMORY: + src_data = reinterpret_cast<DataType const *>(handle_src.ram_handle().get()); + for (vcl_size_t i=0; i<buffer_dst.size(); ++i) + buffer_dst.set(i, src_data[i]); + break; + +#ifdef VIENNACL_WITH_OPENCL + case OPENCL_MEMORY: + buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src); + opencl::memory_read(handle_src.opencl_handle(), 0, buffer_src.raw_size(), buffer_src.get()); + for (vcl_size_t i=0; i<buffer_dst.size(); ++i) + buffer_dst.set(i, buffer_src[i]); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case CUDA_MEMORY: + buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src); + cuda::memory_read(handle_src.cuda_handle(), 0, buffer_src.raw_size(), buffer_src.get()); + for (vcl_size_t i=0; i<buffer_dst.size(); ++i) + buffer_dst.set(i, buffer_src[i]); + break; +#endif + + default: + throw memory_exception("unsupported memory domain"); + } + + // + // Step 2: Write to destination + // + if (handle_dst.raw_size() == buffer_dst.raw_size()) + viennacl::backend::memory_write(handle_dst, 0, buffer_dst.raw_size(), buffer_dst.get()); + else + viennacl::backend::memory_create(handle_dst, buffer_dst.raw_size(), viennacl::traits::context(handle_dst), buffer_dst.get()); + + } + else + { + // No data conversion required. + typesafe_host_array<DataType> buffer(handle_src); + + switch (handle_src.get_active_handle_id()) + { + case MAIN_MEMORY: + switch (handle_dst.get_active_handle_id()) + { + case MAIN_MEMORY: + case OPENCL_MEMORY: + case CUDA_MEMORY: + if (handle_dst.raw_size() == handle_src.raw_size()) + viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), handle_src.ram_handle().get()); + else + viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst), handle_src.ram_handle().get()); + break; + + default: + throw memory_exception("unsupported destination memory domain"); + } + break; + + case OPENCL_MEMORY: + switch (handle_dst.get_active_handle_id()) + { + case MAIN_MEMORY: + if (handle_dst.raw_size() != handle_src.raw_size()) + viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); + viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get()); + break; + + case OPENCL_MEMORY: + if (handle_dst.raw_size() != handle_src.raw_size()) + viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); + viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size()); + break; + + case CUDA_MEMORY: + if (handle_dst.raw_size() != handle_src.raw_size()) + viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); + buffer.resize(handle_src, handle_src.raw_size() / element_size_src); + viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get()); + viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get()); + break; + + default: + throw memory_exception("unsupported destination memory domain"); + } + break; + + case CUDA_MEMORY: + switch (handle_dst.get_active_handle_id()) + { + case MAIN_MEMORY: + if (handle_dst.raw_size() != handle_src.raw_size()) + viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); + viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get()); + break; + + case OPENCL_MEMORY: + if (handle_dst.raw_size() != handle_src.raw_size()) + viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); + buffer.resize(handle_src, handle_src.raw_size() / element_size_src); + viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get()); + viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get()); + break; + + case CUDA_MEMORY: + if (handle_dst.raw_size() != handle_src.raw_size()) + viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); + viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size()); + break; + + default: + throw memory_exception("unsupported destination memory domain"); + } + break; + + default: + throw memory_exception("unsupported source memory domain"); + } + + } + } + + +} //backend + +// +// Convenience layer: +// +/** @brief Generic convenience routine for migrating data of an object to a new memory domain */ +template<typename T> +void switch_memory_context(T & obj, viennacl::context new_ctx) +{ + obj.switch_memory_context(new_ctx); +} + +} //viennacl +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp new file mode 100644 index 0000000..a8be55a --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp @@ -0,0 +1,151 @@ +#ifndef VIENNACL_BACKEND_OPENCL_HPP_ +#define VIENNACL_BACKEND_OPENCL_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/backend/opencl.hpp + @brief Implementations for the OpenCL backend functionality +*/ + + +#include <vector> +#include "viennacl/ocl/handle.hpp" +#include "viennacl/ocl/backend.hpp" + +namespace viennacl +{ +namespace backend +{ +namespace opencl +{ + +// Requirements for backend: + +// * memory_create(size, host_ptr) +// * memory_copy(src, dest, offset_src, offset_dest, size) +// * memory_write_from_main_memory(src, offset, size, +// dest, offset, size) +// * memory_read_to_main_memory(src, offset, size +// dest, offset, size) +// * +// + +/** @brief Creates an array of the specified size in the current OpenCL context. If the second argument is provided, the buffer is initialized with data from that pointer. + * + * @param size_in_bytes Number of bytes to allocate + * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. + * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) + * + */ +inline cl_mem memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_in_bytes, const void * host_ptr = NULL) +{ + //std::cout << "Creating buffer (" << size_in_bytes << " bytes) host buffer " << host_ptr << " in context " << &ctx << std::endl; + return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast<unsigned int>(size_in_bytes), const_cast<void *>(host_ptr)); +} + +/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' in the OpenCL context to memory starting at address 'dst_buffer + dst_offset' in the same OpenCL context. + * + * @param src_buffer A smart pointer to the begin of an allocated OpenCL buffer + * @param dst_buffer A smart pointer to the end of an allocated OpenCL buffer + * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes) + * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) + * @param bytes_to_copy Number of bytes to be copied + */ +inline void memory_copy(viennacl::ocl::handle<cl_mem> const & src_buffer, + viennacl::ocl::handle<cl_mem> & dst_buffer, + vcl_size_t src_offset, + vcl_size_t dst_offset, + vcl_size_t bytes_to_copy) +{ + assert( &src_buffer.context() == &dst_buffer.context() && bool("Transfer between memory buffers in different contexts not supported yet!")); + + viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(src_buffer.context()); + cl_int err = clEnqueueCopyBuffer(memory_context.get_queue().handle().get(), + src_buffer.get(), + dst_buffer.get(), + src_offset, + dst_offset, + bytes_to_copy, + 0, NULL, NULL); //events + VIENNACL_ERR_CHECK(err); +} + + +/** @brief Writes data from main RAM identified by 'ptr' to the OpenCL buffer identified by 'dst_buffer' + * + * @param dst_buffer A smart pointer to the beginning of an allocated OpenCL buffer + * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes) + * @param bytes_to_copy Number of bytes to be copied + * @param ptr Pointer to the first byte to be written + * @param async Whether the operation should be asynchronous + */ +inline void memory_write(viennacl::ocl::handle<cl_mem> & dst_buffer, + vcl_size_t dst_offset, + vcl_size_t bytes_to_copy, + const void * ptr, + bool async = false) +{ + + viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(dst_buffer.context()); + +#if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE) + std::cout << "Writing data (" << bytes_to_copy << " bytes, offset " << dst_offset << ") to OpenCL buffer " << dst_buffer.get() << " with queue " << memory_context.get_queue().handle().get() << " from " << ptr << std::endl; +#endif + + cl_int err = clEnqueueWriteBuffer(memory_context.get_queue().handle().get(), + dst_buffer.get(), + async ? CL_FALSE : CL_TRUE, //blocking + dst_offset, + bytes_to_copy, + ptr, + 0, NULL, NULL); //events + VIENNACL_ERR_CHECK(err); +} + + +/** @brief Reads data from an OpenCL buffer back to main RAM. + * + * @param src_buffer A smart pointer to the beginning of an allocated OpenCL source buffer + * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_ + * @param bytes_to_copy Number of bytes to be read + * @param ptr Location in main RAM where to read data should be written to + * @param async Whether the operation should be asynchronous + */ +inline void memory_read(viennacl::ocl::handle<cl_mem> const & src_buffer, + vcl_size_t src_offset, + vcl_size_t bytes_to_copy, + void * ptr, + bool async = false) +{ + //std::cout << "Reading data (" << bytes_to_copy << " bytes, offset " << src_offset << ") from OpenCL buffer " << src_buffer.get() << " to " << ptr << std::endl; + viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(src_buffer.context()); + cl_int err = clEnqueueReadBuffer(memory_context.get_queue().handle().get(), + src_buffer.get(), + async ? CL_FALSE : CL_TRUE, //blocking + src_offset, + bytes_to_copy, + ptr, + 0, NULL, NULL); //events + VIENNACL_ERR_CHECK(err); +} + + +} +} //backend +} //viennacl +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp new file mode 100644 index 0000000..9aaeb2e --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp @@ -0,0 +1,268 @@ +#ifndef VIENNACL_BACKEND_UTIL_HPP +#define VIENNACL_BACKEND_UTIL_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/backend/util.hpp + @brief Helper functionality for working with different memory domains +*/ + +#include <vector> +#include <cassert> + +#include "viennacl/forwards.h" +#include "viennacl/backend/mem_handle.hpp" + +#ifdef VIENNACL_WITH_OPENCL +#include "viennacl/backend/opencl.hpp" +#endif + + +namespace viennacl +{ +namespace backend +{ +namespace detail +{ + + /** @brief Helper struct for converting a type to its OpenCL pendant. */ + template<typename T> + struct convert_to_opencl + { + typedef T type; + enum { special = 0 }; + }; + +#ifdef VIENNACL_WITH_OPENCL + template<> + struct convert_to_opencl<unsigned int> + { + typedef cl_uint type; + //enum { special = (sizeof(unsigned int) != sizeof(cl_uint)) }; + enum { special = 1 }; + }; + + template<> + struct convert_to_opencl<int> + { + typedef cl_int type; + //enum { special = (sizeof(int) != sizeof(cl_int)) }; + enum { special = 1 }; + }; + + + template<> + struct convert_to_opencl<unsigned long> + { + typedef cl_ulong type; + //enum { special = (sizeof(unsigned long) != sizeof(cl_ulong)) }; + enum { special = 1 }; + }; + + template<> + struct convert_to_opencl<long> + { + typedef cl_long type; + //enum { special = (sizeof(long) != sizeof(cl_long)) }; + enum { special = 1 }; + }; +#endif + + +} //namespace detail + + +/** @brief Helper class implementing an array on the host. Default case: No conversion necessary */ +template<typename T, bool special = detail::convert_to_opencl<T>::special> +class typesafe_host_array +{ + typedef T cpu_type; + typedef typename detail::convert_to_opencl<T>::type target_type; + +public: + explicit typesafe_host_array() : bytes_buffer_(NULL), buffer_size_(0) {} + + explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) + { + resize(handle, num); + } + + ~typesafe_host_array() { delete[] bytes_buffer_; } + + // + // Setter and Getter + // + void * get() { return reinterpret_cast<void *>(bytes_buffer_); } + vcl_size_t raw_size() const { return buffer_size_; } + vcl_size_t element_size() const { return sizeof(cpu_type); } + vcl_size_t size() const { return buffer_size_ / element_size(); } + template<typename U> + void set(vcl_size_t index, U value) + { + reinterpret_cast<cpu_type *>(bytes_buffer_)[index] = static_cast<cpu_type>(value); + } + + // + // Resize functionality + // + + /** @brief Resize without initializing the new memory */ + void raw_resize(mem_handle const & /*handle*/, vcl_size_t num) + { + buffer_size_ = sizeof(cpu_type) * num; + + if (num > 0) + { + delete[] bytes_buffer_; + + bytes_buffer_ = new char[buffer_size_]; + } + } + + /** @brief Resize including initialization of new memory (cf. std::vector<>) */ + void resize(mem_handle const & handle, vcl_size_t num) + { + raw_resize(handle, num); + + if (num > 0) + { + for (vcl_size_t i=0; i<buffer_size_; ++i) + bytes_buffer_[i] = 0; + } + } + + cpu_type operator[](vcl_size_t index) const + { + assert(index < size() && bool("index out of bounds")); + + return reinterpret_cast<cpu_type *>(bytes_buffer_)[index]; + } + +private: + char * bytes_buffer_; + vcl_size_t buffer_size_; +}; + + + + +/** @brief Special host array type for conversion between OpenCL types and pure CPU types */ +template<typename T> +class typesafe_host_array<T, true> +{ + typedef T cpu_type; + typedef typename detail::convert_to_opencl<T>::type target_type; + +public: + explicit typesafe_host_array() : convert_to_opencl_( (default_memory_type() == OPENCL_MEMORY) ? true : false), bytes_buffer_(NULL), buffer_size_(0) {} + + explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : convert_to_opencl_(false), bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) + { + resize(handle, num); + } + + ~typesafe_host_array() { delete[] bytes_buffer_; } + + // + // Setter and Getter + // + + template<typename U> + void set(vcl_size_t index, U value) + { +#ifdef VIENNACL_WITH_OPENCL + if (convert_to_opencl_) + reinterpret_cast<target_type *>(bytes_buffer_)[index] = static_cast<target_type>(value); + else +#endif + reinterpret_cast<cpu_type *>(bytes_buffer_)[index] = static_cast<cpu_type>(value); + } + + void * get() { return reinterpret_cast<void *>(bytes_buffer_); } + cpu_type operator[](vcl_size_t index) const + { + assert(index < size() && bool("index out of bounds")); +#ifdef VIENNACL_WITH_OPENCL + if (convert_to_opencl_) + return static_cast<cpu_type>(reinterpret_cast<target_type *>(bytes_buffer_)[index]); +#endif + return reinterpret_cast<cpu_type *>(bytes_buffer_)[index]; + } + + vcl_size_t raw_size() const { return buffer_size_; } + vcl_size_t element_size() const + { +#ifdef VIENNACL_WITH_OPENCL + if (convert_to_opencl_) + return sizeof(target_type); +#endif + return sizeof(cpu_type); + } + vcl_size_t size() const { return buffer_size_ / element_size(); } + + // + // Resize functionality + // + + /** @brief Resize without initializing the new memory */ + void raw_resize(mem_handle const & handle, vcl_size_t num) + { + buffer_size_ = sizeof(cpu_type) * num; + (void)handle; //silence unused variable warning if compiled without OpenCL support + +#ifdef VIENNACL_WITH_OPENCL + memory_types mem_type = handle.get_active_handle_id(); + if (mem_type == MEMORY_NOT_INITIALIZED) + mem_type = default_memory_type(); + + if (mem_type == OPENCL_MEMORY) + { + convert_to_opencl_ = true; + buffer_size_ = sizeof(target_type) * num; + } +#endif + + if (num > 0) + { + delete[] bytes_buffer_; + + bytes_buffer_ = new char[buffer_size_]; + } + } + + /** @brief Resize including initialization of new memory (cf. std::vector<>) */ + void resize(mem_handle const & handle, vcl_size_t num) + { + raw_resize(handle, num); + + if (num > 0) + { + for (vcl_size_t i=0; i<buffer_size_; ++i) + bytes_buffer_[i] = 0; + } + } + +private: + bool convert_to_opencl_; + char * bytes_buffer_; + vcl_size_t buffer_size_; +}; + +} //backend +} //viennacl +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp new file mode 100644 index 0000000..1ee13d5 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp @@ -0,0 +1,359 @@ +#ifndef VIENNACL_CIRCULANT_MATRIX_HPP +#define VIENNACL_CIRCULANT_MATRIX_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file circulant_matrix.hpp + @brief Implementation of the circulant_matrix class for efficient manipulation of circulant matrices. Experimental. +*/ + +#include "viennacl/forwards.h" +#include "viennacl/vector.hpp" +#include "viennacl/ocl/backend.hpp" + +#include "viennacl/linalg/circulant_matrix_operations.hpp" + +#include "viennacl/fft.hpp" + +namespace viennacl +{ +/** @brief A Circulant matrix class + * + * @tparam NumericT The underlying scalar type (either float or double) + * @tparam AlignmentV The internal memory size is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. + */ +template<class NumericT, unsigned int AlignmentV> +class circulant_matrix +{ +public: + typedef viennacl::backend::mem_handle handle_type; + typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type; + + /** + * @brief The default constructor. Does not allocate any memory. + * + */ + explicit circulant_matrix() {} + + /** + * @brief Creates the matrix with the given size + * + * @param rows Number of rows of the matrix + * @param cols Number of columns of the matrix + */ + explicit circulant_matrix(vcl_size_t rows, vcl_size_t cols) : elements_(rows) + { + assert(rows == cols && bool("Circulant matrix must be square!")); + (void)cols; // avoid 'unused parameter' warning in optimized builds + } + + /** @brief Resizes the matrix. + * Existing entries can be preserved + * + * @param sz New size of matrix + * @param preserve If true, existing values are preserved. + */ + void resize(vcl_size_t sz, bool preserve = true) + { + elements_.resize(sz, preserve); + } + + /** @brief Returns the OpenCL handle + * + * @return OpenCL handle + */ + handle_type const & handle() const { return elements_.handle(); } + + /** + * @brief Returns an internal viennacl::vector, which represents a circulant matrix elements + * + */ + viennacl::vector<NumericT, AlignmentV> & elements() { return elements_; } + viennacl::vector<NumericT, AlignmentV> const & elements() const { return elements_; } + + /** + * @brief Returns the number of rows of the matrix + */ + vcl_size_t size1() const { return elements_.size(); } + + /** + * @brief Returns the number of columns of the matrix + */ + vcl_size_t size2() const { return elements_.size(); } + + /** @brief Returns the internal size of matrix representtion. + * Usually required for launching OpenCL kernels only + * + * @return Internal size of matrix representation + */ + vcl_size_t internal_size() const { return elements_.internal_size(); } + + /** + * @brief Read-write access to a single element of the matrix + * + * @param row_index Row index of accessed element + * @param col_index Column index of accessed element + * @return Proxy for matrix entry + */ + entry_proxy<NumericT> operator()(vcl_size_t row_index, vcl_size_t col_index) + { + long index = static_cast<long>(row_index) - static_cast<long>(col_index); + + assert(row_index < size1() && col_index < size2() && bool("Invalid access")); + + while (index < 0) + index += static_cast<long>(size1()); + return elements_[static_cast<vcl_size_t>(index)]; + } + + /** + * @brief += operation for circulant matrices + * + * @param that Matrix which will be added + * @return Result of addition + */ + circulant_matrix<NumericT, AlignmentV>& operator +=(circulant_matrix<NumericT, AlignmentV>& that) + { + elements_ += that.elements(); + return *this; + } + +private: + circulant_matrix(circulant_matrix const &) {} + circulant_matrix & operator=(circulant_matrix const & t); + + viennacl::vector<NumericT, AlignmentV> elements_; +}; + +/** @brief Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) + * + * + * @param cpu_vec A std::vector on the host. + * @param gpu_mat A circulant_matrix from ViennaCL + */ +template<typename NumericT, unsigned int AlignmentV> +void copy(std::vector<NumericT>& cpu_vec, circulant_matrix<NumericT, AlignmentV>& gpu_mat) +{ + assert( (gpu_mat.size1() == 0 || cpu_vec.size() == gpu_mat.size1()) && bool("Size mismatch")); + copy(cpu_vec, gpu_mat.elements()); +} + +/** @brief Copies a circulant matrix from the OpenCL device (either GPU or multi-core CPU) to the std::vector + * + * + * @param gpu_mat A circulant_matrix from ViennaCL + * @param cpu_vec A std::vector on the host. + */ +template<typename NumericT, unsigned int AlignmentV> +void copy(circulant_matrix<NumericT, AlignmentV>& gpu_mat, std::vector<NumericT>& cpu_vec) +{ + assert(cpu_vec.size() == gpu_mat.size1() && bool("Size mismatch")); + copy(gpu_mat.elements(), cpu_vec); +} + +/** @brief Copies a circulant matrix from the OpenCL device (either GPU or multi-core CPU) to the matrix-like object + * + * + * @param circ_src A circulant_matrix from ViennaCL + * @param com_dst A matrix-like object + */ +template<typename NumericT, unsigned int AlignmentV, typename MatrixT> +void copy(circulant_matrix<NumericT, AlignmentV>& circ_src, MatrixT& com_dst) +{ + vcl_size_t size = circ_src.size1(); + assert(size == viennacl::traits::size1(com_dst) && bool("Size mismatch")); + assert(size == viennacl::traits::size2(com_dst) && bool("Size mismatch")); + std::vector<NumericT> tmp(size); + copy(circ_src, tmp); + + for (vcl_size_t i = 0; i < size; i++) + { + for (vcl_size_t j = 0; j < size; j++) + { + long index = static_cast<long>(i) - static_cast<long>(j); + if (index < 0) + index += static_cast<long>(size); + com_dst(i, j) = tmp[static_cast<vcl_size_t>(index)]; + } + } +} + +/** @brief Copies a the matrix-like object to the circulant matrix from the OpenCL device (either GPU or multi-core CPU) + * + * + * @param com_src A std::vector on the host + * @param circ_dst A circulant_matrix from ViennaCL + */ +template<typename NumericT, unsigned int AlignmentV, typename MatrixT> +void copy(MatrixT& com_src, circulant_matrix<NumericT, AlignmentV>& circ_dst) +{ + assert( (circ_dst.size1() == 0 || circ_dst.size1() == viennacl::traits::size1(com_src)) && bool("Size mismatch")); + assert( (circ_dst.size2() == 0 || circ_dst.size2() == viennacl::traits::size2(com_src)) && bool("Size mismatch")); + + vcl_size_t size = viennacl::traits::size1(com_src); + + std::vector<NumericT> tmp(size); + + for (vcl_size_t i = 0; i < size; i++) tmp[i] = com_src(i, 0); + + copy(tmp, circ_dst); +} + +/*namespace linalg + { + template<typename NumericT, unsigned int AlignmentV, unsigned int VECTOR_AlignmentV> + void prod_impl(circulant_matrix<NumericT, AlignmentV> const & mat, + vector<NumericT, VECTOR_AlignmentV> const & vec, + vector<NumericT, VECTOR_AlignmentV>& result) { + viennacl::vector<NumericT, VECTOR_AlignmentV> circ(mat.elements().size() * 2); + fft::real_to_complex(mat.elements(), circ, mat.elements().size()); + + viennacl::vector<NumericT, VECTOR_AlignmentV> tmp(vec.size() * 2); + viennacl::vector<NumericT, VECTOR_AlignmentV> tmp2(vec.size() * 2); + + fft::real_to_complex(vec, tmp, vec.size()); + fft::convolve(circ, tmp, tmp2); + fft::complex_to_real(tmp2, result, vec.size()); + } + }*/ + +/** @brief Prints the matrix. Output is compatible to boost::numeric::ublas + * + * @param s STL output stream + * @param gpu_matrix A ViennaCL circulant matrix + */ +template<class NumericT, unsigned int AlignmentV> +std::ostream & operator<<(std::ostream& s, circulant_matrix<NumericT, AlignmentV>& gpu_matrix) +{ + vcl_size_t size = gpu_matrix.size1(); + std::vector<NumericT> tmp(size); + copy(gpu_matrix, tmp); + s << "[" << size << "," << size << "]("; + + for (vcl_size_t i = 0; i < size; i++) + { + s << "("; + for (vcl_size_t j = 0; j < size; j++) + { + long index = static_cast<long>(i) - static_cast<long>(j); + if (index < 0) index = static_cast<long>(size) + index; + s << tmp[vcl_size_t(index)]; + //s << index; + if (j < (size - 1)) s << ","; + } + s << ")"; + } + s << ")"; + return s; +} + +// +// Specify available operations: +// + +/** \cond */ + +namespace linalg +{ +namespace detail +{ + // x = A * y + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_assign, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x = A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); + lhs = temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); + } + }; + + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); + lhs += temp; + } + }; + + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); + lhs -= temp; + } + }; + + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_assign, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs()); + viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); + } + }; + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const circulant_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs()); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs += temp_result; + } + }; + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs()); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs -= temp_result; + } + }; + +} // namespace detail +} // namespace linalg + +/** \endcond */ +} + +#endif // VIENNACL_CIRCULANT_MATRIX_HPP
