http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp deleted file mode 100644 index e463e88..0000000 --- a/native-viennaCL/src/main/cpp/libviennacl/src/init_matrix.hpp +++ /dev/null @@ -1,101 +0,0 @@ -/* ========================================================================= - Copyright (c) 2010-2014, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the PDF manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -#include "viennacl.hpp" -#include "viennacl/backend/mem_handle.hpp" - - - -static ViennaCLStatus init_cuda_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) -{ -#ifdef VIENNACL_WITH_CUDA - h.switch_active_handle_id(viennacl::CUDA_MEMORY); - h.cuda_handle().reset(A->cuda_mem); - h.cuda_handle().inc(); - if (A->precision == ViennaCLFloat) - h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness - else if (A->precision == ViennaCLDouble) - h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness - else - return ViennaCLGenericFailure; - - return ViennaCLSuccess; -#else - (void)h; - (void)A; - return ViennaCLGenericFailure; -#endif -} - -static ViennaCLStatus init_opencl_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) -{ -#ifdef VIENNACL_WITH_OPENCL - h.switch_active_handle_id(viennacl::OPENCL_MEMORY); - h.opencl_handle() = A->opencl_mem; - h.opencl_handle().inc(); - if (A->precision == ViennaCLFloat) - h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness - else if (A->precision == ViennaCLDouble) - h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness - else - return ViennaCLGenericFailure; - - return ViennaCLSuccess; -#else - (void)h; - (void)A; - return ViennaCLGenericFailure; -#endif -} - - -static ViennaCLStatus init_host_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) -{ - h.switch_active_handle_id(viennacl::MAIN_MEMORY); - h.ram_handle().reset(A->host_mem); - h.ram_handle().inc(); - if (A->precision == ViennaCLFloat) - h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(float)); // not necessary, but still set for conciseness - else if (A->precision == ViennaCLDouble) - h.raw_size(static_cast<viennacl::vcl_size_t>(A->internal_size1) * static_cast<viennacl::vcl_size_t>(A->internal_size2) * sizeof(double)); // not necessary, but still set for conciseness - else - return ViennaCLGenericFailure; - - return ViennaCLSuccess; -} - - -static ViennaCLStatus init_matrix(viennacl::backend::mem_handle & h, ViennaCLMatrix A) -{ - switch (A->backend->backend_type) - { - case ViennaCLCUDA: - return init_cuda_matrix(h, A); - - case ViennaCLOpenCL: - return init_opencl_matrix(h, A); - - case ViennaCLHost: - return init_host_matrix(h, A); - - default: - return ViennaCLGenericFailure; - } -} - - -
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp deleted file mode 100644 index 8be00d7..0000000 --- a/native-viennaCL/src/main/cpp/libviennacl/src/init_vector.hpp +++ /dev/null @@ -1,101 +0,0 @@ -/* ========================================================================= - Copyright (c) 2010-2014, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the PDF manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -#include "viennacl.hpp" -#include "viennacl/backend/mem_handle.hpp" - - - -static ViennaCLStatus init_cuda_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) -{ -#ifdef VIENNACL_WITH_CUDA - h.switch_active_handle_id(viennacl::CUDA_MEMORY); - h.cuda_handle().reset(x->cuda_mem); - h.cuda_handle().inc(); - if (x->precision == ViennaCLFloat) - h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * x->size * sizeof(float)); // not necessary, but still set for conciseness - else if (x->precision == ViennaCLDouble) - h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * x->size * sizeof(double)); // not necessary, but still set for conciseness - else - return ViennaCLGenericFailure; - - return ViennaCLSuccess; -#else - (void)h; - (void)x; - return ViennaCLGenericFailure; -#endif -} - -static ViennaCLStatus init_opencl_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) -{ -#ifdef VIENNACL_WITH_OPENCL - h.switch_active_handle_id(viennacl::OPENCL_MEMORY); - h.opencl_handle() = x->opencl_mem; - h.opencl_handle().inc(); - if (x->precision == ViennaCLFloat) - h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(float)); // not necessary, but still set for conciseness - else if (x->precision == ViennaCLDouble) - h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(double)); // not necessary, but still set for conciseness - else - return ViennaCLGenericFailure; - - return ViennaCLSuccess; -#else - (void)h; - (void)x; - return ViennaCLGenericFailure; -#endif -} - - -static ViennaCLStatus init_host_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) -{ - h.switch_active_handle_id(viennacl::MAIN_MEMORY); - h.ram_handle().reset(x->host_mem); - h.ram_handle().inc(); - if (x->precision == ViennaCLFloat) - h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(float)); // not necessary, but still set for conciseness - else if (x->precision == ViennaCLDouble) - h.raw_size(static_cast<viennacl::vcl_size_t>(x->inc) * static_cast<viennacl::vcl_size_t>(x->size) * sizeof(double)); // not necessary, but still set for conciseness - else - return ViennaCLGenericFailure; - - return ViennaCLSuccess; -} - - -static ViennaCLStatus init_vector(viennacl::backend::mem_handle & h, ViennaCLVector x) -{ - switch (x->backend->backend_type) - { - case ViennaCLCUDA: - return init_cuda_vector(h, x); - - case ViennaCLOpenCL: - return init_opencl_vector(h, x); - - case ViennaCLHost: - return init_host_vector(h, x); - - default: - return ViennaCLGenericFailure; - } -} - - - http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp b/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp deleted file mode 100644 index c66c848..0000000 --- a/native-viennaCL/src/main/cpp/libviennacl/src/viennacl_private.hpp +++ /dev/null @@ -1,141 +0,0 @@ -#ifndef VIENNACL_VIENNACL_PRIVATE_HPP -#define VIENNACL_VIENNACL_PRIVATE_HPP - - -/* ========================================================================= - Copyright (c) 2010-2014, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the PDF manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -#include <stdlib.h> - -#ifdef VIENNACL_WITH_OPENCL -#ifdef __APPLE__ -#include <OpenCL/cl.h> -#else -#include <CL/cl.h> -#endif -#endif - -#include "viennacl.hpp" - - -/************* Backend Management ******************/ - -struct ViennaCLCUDABackend_impl -{ - //TODO: Add stream and/or device descriptors here -}; - -struct ViennaCLOpenCLBackend_impl -{ - ViennaCLInt context_id; -}; - -struct ViennaCLHostBackend_impl -{ - // Nothing to specify *at the moment* -}; - - -/** @brief Generic backend for CUDA, OpenCL, host-based stuff */ -struct ViennaCLBackend_impl -{ - ViennaCLBackendTypes backend_type; - - ViennaCLCUDABackend_impl cuda_backend; - ViennaCLOpenCLBackend_impl opencl_backend; - ViennaCLHostBackend_impl host_backend; -}; - - - -/******** User Types **********/ - -struct ViennaCLHostScalar_impl -{ - ViennaCLPrecision precision; - - union { - float value_float; - double value_double; - }; -}; - -struct ViennaCLScalar_impl -{ - ViennaCLBackend backend; - ViennaCLPrecision precision; - - // buffer: -#ifdef VIENNACL_WITH_CUDA - char * cuda_mem; -#endif -#ifdef VIENNACL_WITH_OPENCL - cl_mem opencl_mem; -#endif - char * host_mem; - - ViennaCLInt offset; -}; - -struct ViennaCLVector_impl -{ - ViennaCLBackend backend; - ViennaCLPrecision precision; - - // buffer: -#ifdef VIENNACL_WITH_CUDA - char * cuda_mem; -#endif -#ifdef VIENNACL_WITH_OPENCL - cl_mem opencl_mem; -#endif - char * host_mem; - - ViennaCLInt offset; - ViennaCLInt inc; - ViennaCLInt size; -}; - -struct ViennaCLMatrix_impl -{ - ViennaCLBackend backend; - ViennaCLPrecision precision; - ViennaCLOrder order; - ViennaCLTranspose trans; - - // buffer: -#ifdef VIENNACL_WITH_CUDA - char * cuda_mem; -#endif -#ifdef VIENNACL_WITH_OPENCL - cl_mem opencl_mem; -#endif - char * host_mem; - - ViennaCLInt size1; - ViennaCLInt start1; - ViennaCLInt stride1; - ViennaCLInt internal_size1; - - ViennaCLInt size2; - ViennaCLInt start2; - ViennaCLInt stride2; - ViennaCLInt internal_size2; -}; - - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp deleted file mode 100644 index ccfd035..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/backend/cpu_ram.hpp +++ /dev/null @@ -1,171 +0,0 @@ -#ifndef VIENNACL_BACKEND_CPU_RAM_HPP_ -#define VIENNACL_BACKEND_CPU_RAM_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/backend/cpu_ram.hpp - @brief Implementations for the OpenCL backend functionality -*/ - -#include <cassert> -#include <vector> -#ifdef VIENNACL_WITH_AVX2 -#include <stdlib.h> -#endif - -#include "viennacl/forwards.h" -#include "viennacl/tools/shared_ptr.hpp" - -namespace viennacl -{ -namespace backend -{ -namespace cpu_ram -{ -typedef viennacl::tools::shared_ptr<char> handle_type; -// Requirements for backend: - -// * memory_create(size, host_ptr) -// * memory_copy(src, dest, offset_src, offset_dest, size) -// * memory_write_from_main_memory(src, offset, size, -// dest, offset, size) -// * memory_read_to_main_memory(src, offset, size -// dest, offset, size) -// * -// - -namespace detail -{ - /** @brief Helper struct for deleting an pointer to an array */ - template<class U> - struct array_deleter - { -#ifdef VIENNACL_WITH_AVX2 - void operator()(U* p) const { free(p); } -#else - void operator()(U* p) const { delete[] p; } -#endif - }; - -} - -/** @brief Creates an array of the specified size in main RAM. If the second argument is provided, the buffer is initialized with data from that pointer. - * - * @param size_in_bytes Number of bytes to allocate - * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. - * - */ -inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL) -{ -#ifdef VIENNACL_WITH_AVX2 - // Note: aligned_alloc not available on all compilers. Consider platform-specific alternatives such as posix_memalign() - if (!host_ptr) - return handle_type(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>()); - - handle_type new_handle(reinterpret_cast<char*>(aligned_alloc(32, size_in_bytes)), detail::array_deleter<char>()); -#else - if (!host_ptr) - return handle_type(new char[size_in_bytes], detail::array_deleter<char>()); - - handle_type new_handle(new char[size_in_bytes], detail::array_deleter<char>()); -#endif - - // copy data: - char * raw_ptr = new_handle.get(); - const char * data_ptr = static_cast<const char *>(host_ptr); -#ifdef VIENNACL_WITH_OPENMP - #pragma omp parallel for -#endif - for (long i=0; i<long(size_in_bytes); ++i) - raw_ptr[i] = data_ptr[i]; - - return new_handle; -} - -/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'. - * - * @param src_buffer A smart pointer to the begin of an allocated buffer - * @param dst_buffer A smart pointer to the end of an allocated buffer - * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes) - * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) - * @param bytes_to_copy Number of bytes to be copied - */ -inline void memory_copy(handle_type const & src_buffer, - handle_type & dst_buffer, - vcl_size_t src_offset, - vcl_size_t dst_offset, - vcl_size_t bytes_to_copy) -{ - assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); - assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); - -#ifdef VIENNACL_WITH_OPENMP - #pragma omp parallel for -#endif - for (long i=0; i<long(bytes_to_copy); ++i) - dst_buffer.get()[vcl_size_t(i)+dst_offset] = src_buffer.get()[vcl_size_t(i) + src_offset]; -} - -/** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer' - * - * @param dst_buffer A smart pointer to the beginning of an allocated buffer - * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes) - * @param bytes_to_copy Number of bytes to be copied - * @param ptr Pointer to the first byte to be written - */ -inline void memory_write(handle_type & dst_buffer, - vcl_size_t dst_offset, - vcl_size_t bytes_to_copy, - const void * ptr, - bool /*async*/) -{ - assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); - -#ifdef VIENNACL_WITH_OPENMP - #pragma omp parallel for -#endif - for (long i=0; i<long(bytes_to_copy); ++i) - dst_buffer.get()[vcl_size_t(i)+dst_offset] = static_cast<const char *>(ptr)[i]; -} - -/** @brief Reads data from a buffer back to main RAM. - * - * @param src_buffer A smart pointer to the beginning of an allocated source buffer - * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_ - * @param bytes_to_copy Number of bytes to be read - * @param ptr Location in main RAM where to read data should be written to - */ -inline void memory_read(handle_type const & src_buffer, - vcl_size_t src_offset, - vcl_size_t bytes_to_copy, - void * ptr, - bool /*async*/) -{ - assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); - -#ifdef VIENNACL_WITH_OPENMP - #pragma omp parallel for -#endif - for (long i=0; i<long(bytes_to_copy); ++i) - static_cast<char *>(ptr)[i] = src_buffer.get()[vcl_size_t(i)+src_offset]; -} - -} -} //backend -} //viennacl -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp deleted file mode 100644 index 641bfea..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/backend/cuda.hpp +++ /dev/null @@ -1,206 +0,0 @@ -#ifndef VIENNACL_BACKEND_CUDA_HPP_ -#define VIENNACL_BACKEND_CUDA_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/backend/cuda.hpp - @brief Implementations for the CUDA backend functionality -*/ - - -#include <iostream> -#include <vector> -#include <cassert> -#include <stdexcept> -#include <sstream> - -#include "viennacl/forwards.h" -#include "viennacl/tools/shared_ptr.hpp" - -// includes CUDA -#include <cuda_runtime.h> - -#define VIENNACL_CUDA_ERROR_CHECK(err) detail::cuda_error_check (err, __FILE__, __LINE__) - -namespace viennacl -{ -namespace backend -{ -namespace cuda -{ - -typedef viennacl::tools::shared_ptr<char> handle_type; -// Requirements for backend: - -// * memory_create(size, host_ptr) -// * memory_copy(src, dest, offset_src, offset_dest, size) -// * memory_write_from_main_memory(src, offset, size, -// dest, offset, size) -// * memory_read_to_main_memory(src, offset, size -// dest, offset, size) -// * -// - -class cuda_exception : public std::runtime_error -{ -public: - cuda_exception(std::string const & what_arg, cudaError_t err_code) : std::runtime_error(what_arg), error_code_(err_code) {} - - cudaError_t error_code() const { return error_code_; } - -private: - cudaError_t error_code_; -}; - -namespace detail -{ - - inline void cuda_error_check(cudaError error_code, const char *file, const int line ) - { - if (cudaSuccess != error_code) - { - std::stringstream ss; - ss << file << "(" << line << "): " << ": CUDA Runtime API error " << error_code << ": " << cudaGetErrorString( error_code ) << std::endl; - throw viennacl::backend::cuda::cuda_exception(ss.str(), error_code); - } - } - - - /** @brief Functor for deleting a CUDA handle. Used within the smart pointer class. */ - template<typename U> - struct cuda_deleter - { - void operator()(U * p) const - { - //std::cout << "Freeing handle " << reinterpret_cast<void *>(p) << std::endl; - cudaFree(p); - } - }; - -} - -/** @brief Creates an array of the specified size on the CUDA device. If the second argument is provided, the buffer is initialized with data from that pointer. - * - * @param size_in_bytes Number of bytes to allocate - * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. - * - */ -inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL) -{ - void * dev_ptr = NULL; - VIENNACL_CUDA_ERROR_CHECK( cudaMalloc(&dev_ptr, size_in_bytes) ); - //std::cout << "Allocated new dev_ptr " << dev_ptr << " of size " << size_in_bytes << std::endl; - - if (!host_ptr) - return handle_type(reinterpret_cast<char *>(dev_ptr), detail::cuda_deleter<char>()); - - handle_type new_handle(reinterpret_cast<char*>(dev_ptr), detail::cuda_deleter<char>()); - - // copy data: - //std::cout << "Filling new handle from host_ptr " << host_ptr << std::endl; - cudaMemcpy(new_handle.get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice); - - return new_handle; -} - - -/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory starting at address 'dst_buffer + dst_offset' on the same CUDA device. - * - * @param src_buffer A smart pointer to the begin of an allocated CUDA buffer - * @param dst_buffer A smart pointer to the end of an allocated CUDA buffer - * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes) - * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) - * @param bytes_to_copy Number of bytes to be copied - */ -inline void memory_copy(handle_type const & src_buffer, - handle_type & dst_buffer, - vcl_size_t src_offset, - vcl_size_t dst_offset, - vcl_size_t bytes_to_copy) -{ - assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); - assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); - - cudaMemcpy(reinterpret_cast<void *>(dst_buffer.get() + dst_offset), - reinterpret_cast<void *>(src_buffer.get() + src_offset), - bytes_to_copy, - cudaMemcpyDeviceToDevice); -} - - -/** @brief Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer' - * - * @param dst_buffer A smart pointer to the beginning of an allocated CUDA buffer - * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes) - * @param bytes_to_copy Number of bytes to be copied - * @param ptr Pointer to the first byte to be written - * @param async Whether the operation should be asynchronous - */ -inline void memory_write(handle_type & dst_buffer, - vcl_size_t dst_offset, - vcl_size_t bytes_to_copy, - const void * ptr, - bool async = false) -{ - assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); - - if (async) - cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset, - reinterpret_cast<const char *>(ptr), - bytes_to_copy, - cudaMemcpyHostToDevice); - else - cudaMemcpy(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset, - reinterpret_cast<const char *>(ptr), - bytes_to_copy, - cudaMemcpyHostToDevice); -} - - -/** @brief Reads data from a CUDA buffer back to main RAM. - * - * @param src_buffer A smart pointer to the beginning of an allocated CUDA source buffer - * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_ - * @param bytes_to_copy Number of bytes to be read - * @param ptr Location in main RAM where to read data should be written to - * @param async Whether the operation should be asynchronous - */ -inline void memory_read(handle_type const & src_buffer, - vcl_size_t src_offset, - vcl_size_t bytes_to_copy, - void * ptr, - bool async = false) -{ - assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); - - if (async) - cudaMemcpyAsync(reinterpret_cast<char *>(ptr), - reinterpret_cast<char *>(src_buffer.get()) + src_offset, - bytes_to_copy, - cudaMemcpyDeviceToHost); - else - cudaMemcpy(reinterpret_cast<char *>(ptr), - reinterpret_cast<char *>(src_buffer.get()) + src_offset, - bytes_to_copy, - cudaMemcpyDeviceToHost); -} - -} //cuda -} //backend -} //viennacl -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp deleted file mode 100644 index 37c680b..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/backend/mem_handle.hpp +++ /dev/null @@ -1,250 +0,0 @@ -#ifndef VIENNACL_BACKEND_MEM_HANDLE_HPP -#define VIENNACL_BACKEND_MEM_HANDLE_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/backend/mem_handle.hpp - @brief Implements the multi-memory-domain handle -*/ - -#include <vector> -#include <cassert> -#include "viennacl/forwards.h" -#include "viennacl/tools/shared_ptr.hpp" -#include "viennacl/backend/cpu_ram.hpp" - -#ifdef VIENNACL_WITH_OPENCL -#include "viennacl/backend/opencl.hpp" -#endif - -#ifdef VIENNACL_WITH_CUDA -#include "viennacl/backend/cuda.hpp" -#endif - - -namespace viennacl -{ -namespace backend -{ - -namespace detail -{ - /** @brief Singleton for managing the default memory type. - * - * @param new_mem_type If NULL, returns the current memory type. Otherwise, sets the memory type to the provided value. - */ - inline memory_types get_set_default_memory_type(memory_types * new_mem_type) - { - // if a user compiles with CUDA, it is reasonable to expect that CUDA should be the default -#ifdef VIENNACL_WITH_CUDA - static memory_types mem_type = CUDA_MEMORY; -#elif defined(VIENNACL_WITH_OPENCL) - static memory_types mem_type = OPENCL_MEMORY; -#else - static memory_types mem_type = MAIN_MEMORY; -#endif - - if (new_mem_type) - mem_type = *new_mem_type; - - return mem_type; - } -} - -/** @brief Returns the default memory type for the given configuration. - * - * CUDA has precedence over OpenCL, which has precedence over main memory. Depends on which VIENNACL_WITH_{CUDA/OPENCL/OPENMP} macros are defined. - */ -inline memory_types default_memory_type() { return detail::get_set_default_memory_type(NULL); } - -/** @brief Sets the default memory type for the given configuration. - * - * Make sure the respective new memory type is enabled. - * For example, passing CUDA_MEMORY if no CUDA backend is selected will result in exceptions being thrown as soon as you try to allocate buffers. - */ -inline memory_types default_memory_type(memory_types new_memory_type) { return detail::get_set_default_memory_type(&new_memory_type); } - - -/** @brief Main abstraction class for multiple memory domains. Represents a buffer in either main RAM, an OpenCL context, or a CUDA device. - * - * The idea is to wrap all possible handle types inside this class so that higher-level code does not need to be cluttered with preprocessor switches. - * Instead, this class collects all the necessary conditional compilations. - * - */ -class mem_handle -{ -public: - typedef viennacl::tools::shared_ptr<char> ram_handle_type; - typedef viennacl::tools::shared_ptr<char> cuda_handle_type; - - /** @brief Default CTOR. No memory is allocated */ - mem_handle() : active_handle_(MEMORY_NOT_INITIALIZED), size_in_bytes_(0) {} - - /** @brief Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated. */ - ram_handle_type & ram_handle() { return ram_handle_; } - /** @brief Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated. */ - ram_handle_type const & ram_handle() const { return ram_handle_; } - -#ifdef VIENNACL_WITH_OPENCL - /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */ - viennacl::ocl::handle<cl_mem> & opencl_handle() { return opencl_handle_; } - /** @brief Returns the handle to an OpenCL buffer. The handle contains NULL if no such buffer has been allocated. */ - viennacl::ocl::handle<cl_mem> const & opencl_handle() const { return opencl_handle_; } -#endif - -#ifdef VIENNACL_WITH_CUDA - /** @brief Returns the handle to a CUDA buffer. The handle contains NULL if no such buffer has been allocated. */ - cuda_handle_type & cuda_handle() { return cuda_handle_; } - /** @brief Returns the handle to a CUDA buffer. The handle contains NULL if no such buffer has been allocated. */ - cuda_handle_type const & cuda_handle() const { return cuda_handle_; } -#endif - - /** @brief Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no data. */ - memory_types get_active_handle_id() const { return active_handle_; } - - /** @brief Switches the currently active handle. If no support for that backend is provided, an exception is thrown. */ - void switch_active_handle_id(memory_types new_id) - { - if (new_id != active_handle_) - { - if (active_handle_ == MEMORY_NOT_INITIALIZED) - active_handle_ = new_id; - else if (active_handle_ == MAIN_MEMORY) - { - active_handle_ = new_id; - } - else if (active_handle_ == OPENCL_MEMORY) - { -#ifdef VIENNACL_WITH_OPENCL - active_handle_ = new_id; -#else - throw memory_exception("compiled without OpenCL suppport!"); -#endif - } - else if (active_handle_ == CUDA_MEMORY) - { -#ifdef VIENNACL_WITH_CUDA - active_handle_ = new_id; -#else - throw memory_exception("compiled without CUDA suppport!"); -#endif - } - else - throw memory_exception("invalid new memory region!"); - } - } - - /** @brief Compares the two handles and returns true if the active memory handles in the two mem_handles point to the same buffer. */ - bool operator==(mem_handle const & other) const - { - if (active_handle_ != other.active_handle_) - return false; - - switch (active_handle_) - { - case MAIN_MEMORY: - return ram_handle_.get() == other.ram_handle_.get(); -#ifdef VIENNACL_WITH_OPENCL - case OPENCL_MEMORY: - return opencl_handle_.get() == other.opencl_handle_.get(); -#endif -#ifdef VIENNACL_WITH_CUDA - case CUDA_MEMORY: - return cuda_handle_.get() == other.cuda_handle_.get(); -#endif - default: break; - } - - return false; - } - - /** @brief Compares the two handles and returns true if the active memory handles in the two mem_handles point a buffer with inferior address - * useful to store handles into a map, since they naturally have strong ordering - */ - bool operator<(mem_handle const & other) const - { - if (active_handle_ != other.active_handle_) - return false; - - switch (active_handle_) - { - case MAIN_MEMORY: - return ram_handle_.get() < other.ram_handle_.get(); -#ifdef VIENNACL_WITH_OPENCL - case OPENCL_MEMORY: - return opencl_handle_.get() < other.opencl_handle_.get(); -#endif -#ifdef VIENNACL_WITH_CUDA - case CUDA_MEMORY: - return cuda_handle_.get() < other.cuda_handle_.get(); -#endif - default: break; - } - - return false; - } - - - bool operator!=(mem_handle const & other) const { return !(*this == other); } - - /** @brief Implements a fast swapping method. No data is copied, only the handles are exchanged. */ - void swap(mem_handle & other) - { - // swap handle type: - memory_types active_handle_tmp = other.active_handle_; - other.active_handle_ = active_handle_; - active_handle_ = active_handle_tmp; - - // swap ram handle: - ram_handle_type ram_handle_tmp = other.ram_handle_; - other.ram_handle_ = ram_handle_; - ram_handle_ = ram_handle_tmp; - - // swap OpenCL handle: -#ifdef VIENNACL_WITH_OPENCL - opencl_handle_.swap(other.opencl_handle_); -#endif -#ifdef VIENNACL_WITH_CUDA - cuda_handle_type cuda_handle_tmp = other.cuda_handle_; - other.cuda_handle_ = cuda_handle_; - cuda_handle_ = cuda_handle_tmp; -#endif - } - - /** @brief Returns the number of bytes of the currently active buffer */ - vcl_size_t raw_size() const { return size_in_bytes_; } - - /** @brief Sets the size of the currently active buffer. Use with care! */ - void raw_size(vcl_size_t new_size) { size_in_bytes_ = new_size; } - -private: - memory_types active_handle_; - ram_handle_type ram_handle_; -#ifdef VIENNACL_WITH_OPENCL - viennacl::ocl::handle<cl_mem> opencl_handle_; -#endif -#ifdef VIENNACL_WITH_CUDA - cuda_handle_type cuda_handle_; -#endif - vcl_size_t size_in_bytes_; -}; - - -} //backend -} //viennacl -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp deleted file mode 100644 index d6f29a5..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/backend/memory.hpp +++ /dev/null @@ -1,628 +0,0 @@ -#ifndef VIENNACL_BACKEND_MEMORY_HPP -#define VIENNACL_BACKEND_MEMORY_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/backend/memory.hpp - @brief Main interface routines for memory management -*/ - -#include <vector> -#include <cassert> -#include "viennacl/forwards.h" -#include "viennacl/backend/mem_handle.hpp" -#include "viennacl/context.hpp" -#include "viennacl/traits/handle.hpp" -#include "viennacl/traits/context.hpp" -#include "viennacl/backend/util.hpp" - -#include "viennacl/backend/cpu_ram.hpp" - -#ifdef VIENNACL_WITH_OPENCL -#include "viennacl/backend/opencl.hpp" -#include "viennacl/ocl/backend.hpp" -#endif - -#ifdef VIENNACL_WITH_CUDA -#include "viennacl/backend/cuda.hpp" -#endif - - -namespace viennacl -{ -namespace backend -{ - - - // if a user compiles with CUDA, it is reasonable to expect that CUDA should be the default - /** @brief Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed. */ - inline void finish() - { -#ifdef VIENNACL_WITH_CUDA - cudaDeviceSynchronize(); -#endif -#ifdef VIENNACL_WITH_OPENCL - viennacl::ocl::get_queue().finish(); -#endif - } - - - - - // Requirements for backend: - - // ---- Memory ---- - // - // * memory_create(size, host_ptr) - // * memory_copy(src, dest, offset_src, offset_dest, size) - // * memory_write(src, offset, size, ptr) - // * memory_read(src, offset, size, ptr) - // - - /** @brief Creates an array of the specified size. If the second argument is provided, the buffer is initialized with data from that pointer. - * - * This is the generic version for CPU RAM, CUDA, and OpenCL. Creates the memory in the currently active memory domain. - * - * @param handle The generic wrapper handle for multiple memory domains which will hold the new buffer. - * @param size_in_bytes Number of bytes to allocate - * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) - * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. - * - */ - inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, viennacl::context const & ctx, const void * host_ptr = NULL) - { - if (size_in_bytes > 0) - { - if (handle.get_active_handle_id() == MEMORY_NOT_INITIALIZED) - handle.switch_active_handle_id(ctx.memory_type()); - - switch (handle.get_active_handle_id()) - { - case MAIN_MEMORY: - handle.ram_handle() = cpu_ram::memory_create(size_in_bytes, host_ptr); - handle.raw_size(size_in_bytes); - break; -#ifdef VIENNACL_WITH_OPENCL - case OPENCL_MEMORY: - handle.opencl_handle().context(ctx.opencl_context()); - handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), size_in_bytes, host_ptr); - handle.raw_size(size_in_bytes); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case CUDA_MEMORY: - handle.cuda_handle() = cuda::memory_create(size_in_bytes, host_ptr); - handle.raw_size(size_in_bytes); - break; -#endif - case MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("unknown memory handle!"); - } - } - } - - /* - inline void memory_create(mem_handle & handle, vcl_size_t size_in_bytes, const void * host_ptr = NULL) - { - viennacl::context ctx(default_memory_type()); - memory_create(handle, size_in_bytes, ctx, host_ptr); - }*/ - - - /** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'dst_buffer + dst_offset'. - * - * This is the generic version for CPU RAM, CUDA, and OpenCL. Copies the memory in the currently active memory domain. - * - * - * @param src_buffer A smart pointer to the begin of an allocated buffer - * @param dst_buffer A smart pointer to the end of an allocated buffer - * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes) - * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) - * @param bytes_to_copy Number of bytes to be copied - */ - inline void memory_copy(mem_handle const & src_buffer, - mem_handle & dst_buffer, - vcl_size_t src_offset, - vcl_size_t dst_offset, - vcl_size_t bytes_to_copy) - { - assert( src_buffer.get_active_handle_id() == dst_buffer.get_active_handle_id() && bool("memory_copy() must be called on buffers from the same domain") ); - - if (bytes_to_copy > 0) - { - switch (src_buffer.get_active_handle_id()) - { - case MAIN_MEMORY: - cpu_ram::memory_copy(src_buffer.ram_handle(), dst_buffer.ram_handle(), src_offset, dst_offset, bytes_to_copy); - break; -#ifdef VIENNACL_WITH_OPENCL - case OPENCL_MEMORY: - opencl::memory_copy(src_buffer.opencl_handle(), dst_buffer.opencl_handle(), src_offset, dst_offset, bytes_to_copy); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case CUDA_MEMORY: - cuda::memory_copy(src_buffer.cuda_handle(), dst_buffer.cuda_handle(), src_offset, dst_offset, bytes_to_copy); - break; -#endif - case MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("unknown memory handle!"); - } - } - } - - // TODO: Refine this concept. Maybe move to constructor? - /** @brief A 'shallow' copy operation from an initialized buffer to an uninitialized buffer. - * The uninitialized buffer just copies the raw handle. - */ - inline void memory_shallow_copy(mem_handle const & src_buffer, - mem_handle & dst_buffer) - { - assert( (dst_buffer.get_active_handle_id() == MEMORY_NOT_INITIALIZED) && bool("Shallow copy on already initialized memory not supported!")); - - switch (src_buffer.get_active_handle_id()) - { - case MAIN_MEMORY: - dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); - dst_buffer.ram_handle() = src_buffer.ram_handle(); - dst_buffer.raw_size(src_buffer.raw_size()); - break; -#ifdef VIENNACL_WITH_OPENCL - case OPENCL_MEMORY: - dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); - dst_buffer.opencl_handle() = src_buffer.opencl_handle(); - dst_buffer.raw_size(src_buffer.raw_size()); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case CUDA_MEMORY: - dst_buffer.switch_active_handle_id(src_buffer.get_active_handle_id()); - dst_buffer.cuda_handle() = src_buffer.cuda_handle(); - dst_buffer.raw_size(src_buffer.raw_size()); - break; -#endif - case MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("unknown memory handle!"); - } - } - - /** @brief Writes data from main RAM identified by 'ptr' to the buffer identified by 'dst_buffer' - * - * This is the generic version for CPU RAM, CUDA, and OpenCL. Writes the memory in the currently active memory domain. - * - * @param dst_buffer A smart pointer to the beginning of an allocated buffer - * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes) - * @param bytes_to_write Number of bytes to be written - * @param ptr Pointer to the first byte to be written - * @param async Whether the operation should be asynchronous - */ - inline void memory_write(mem_handle & dst_buffer, - vcl_size_t dst_offset, - vcl_size_t bytes_to_write, - const void * ptr, - bool async = false) - { - if (bytes_to_write > 0) - { - switch (dst_buffer.get_active_handle_id()) - { - case MAIN_MEMORY: - cpu_ram::memory_write(dst_buffer.ram_handle(), dst_offset, bytes_to_write, ptr, async); - break; -#ifdef VIENNACL_WITH_OPENCL - case OPENCL_MEMORY: - opencl::memory_write(dst_buffer.opencl_handle(), dst_offset, bytes_to_write, ptr, async); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case CUDA_MEMORY: - cuda::memory_write(dst_buffer.cuda_handle(), dst_offset, bytes_to_write, ptr, async); - break; -#endif - case MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("unknown memory handle!"); - } - } - } - - /** @brief Reads data from a buffer back to main RAM. - * - * This is the generic version for CPU RAM, CUDA, and OpenCL. Reads the memory from the currently active memory domain. - * - * @param src_buffer A smart pointer to the beginning of an allocated source buffer - * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_ - * @param bytes_to_read Number of bytes to be read - * @param ptr Location in main RAM where to read data should be written to - * @param async Whether the operation should be asynchronous - */ - inline void memory_read(mem_handle const & src_buffer, - vcl_size_t src_offset, - vcl_size_t bytes_to_read, - void * ptr, - bool async = false) - { - //finish(); //Fixes some issues with AMD APP SDK. However, might sacrifice a few percents of performance in some cases. - - if (bytes_to_read > 0) - { - switch (src_buffer.get_active_handle_id()) - { - case MAIN_MEMORY: - cpu_ram::memory_read(src_buffer.ram_handle(), src_offset, bytes_to_read, ptr, async); - break; -#ifdef VIENNACL_WITH_OPENCL - case OPENCL_MEMORY: - opencl::memory_read(src_buffer.opencl_handle(), src_offset, bytes_to_read, ptr, async); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case CUDA_MEMORY: - cuda::memory_read(src_buffer.cuda_handle(), src_offset, bytes_to_read, ptr, async); - break; -#endif - case MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("unknown memory handle!"); - } - } - } - - - - namespace detail - { - template<typename T> - vcl_size_t element_size(memory_types /* mem_type */) - { - return sizeof(T); - } - - - template<> - inline vcl_size_t element_size<unsigned long>(memory_types - #ifdef VIENNACL_WITH_OPENCL - mem_type //in order to compile cleanly at -Wextra in GCC - #endif - ) - { -#ifdef VIENNACL_WITH_OPENCL - if (mem_type == OPENCL_MEMORY) - return sizeof(cl_ulong); -#endif - return sizeof(unsigned long); - } - - template<> - inline vcl_size_t element_size<long>(memory_types - #ifdef VIENNACL_WITH_OPENCL - mem_type //in order to compile cleanly at -Wextra in GCC - #endif - ) - { -#ifdef VIENNACL_WITH_OPENCL - if (mem_type == OPENCL_MEMORY) - return sizeof(cl_long); -#endif - return sizeof(long); - } - - - template<> - inline vcl_size_t element_size<unsigned int>(memory_types - #ifdef VIENNACL_WITH_OPENCL - mem_type //in order to compile cleanly at -Wextra in GCC - #endif - ) - { -#ifdef VIENNACL_WITH_OPENCL - if (mem_type == OPENCL_MEMORY) - return sizeof(cl_uint); -#endif - return sizeof(unsigned int); - } - - template<> - inline vcl_size_t element_size<int>(memory_types - #ifdef VIENNACL_WITH_OPENCL - mem_type //in order to compile cleanly at -Wextra in GCC - #endif - ) - { -#ifdef VIENNACL_WITH_OPENCL - if (mem_type == OPENCL_MEMORY) - return sizeof(cl_int); -#endif - return sizeof(int); - } - - - } - - - /** @brief Switches the active memory domain within a memory handle. Data is copied if the new active domain differs from the old one. Memory in the source handle is not free'd. */ - template<typename DataType> - void switch_memory_context(mem_handle & handle, viennacl::context new_ctx) - { - if (handle.get_active_handle_id() == new_ctx.memory_type()) - return; - - if (handle.get_active_handle_id() == viennacl::MEMORY_NOT_INITIALIZED || handle.raw_size() == 0) - { - handle.switch_active_handle_id(new_ctx.memory_type()); -#ifdef VIENNACL_WITH_OPENCL - if (new_ctx.memory_type() == OPENCL_MEMORY) - handle.opencl_handle().context(new_ctx.opencl_context()); -#endif - return; - } - - vcl_size_t size_dst = detail::element_size<DataType>(handle.get_active_handle_id()); - vcl_size_t size_src = detail::element_size<DataType>(new_ctx.memory_type()); - - if (size_dst != size_src) // OpenCL data element size not the same as host data element size - { - throw memory_exception("Heterogeneous data element sizes not yet supported!"); - } - else //no data conversion required - { - if (handle.get_active_handle_id() == MAIN_MEMORY) //we can access the existing data directly - { - switch (new_ctx.memory_type()) - { -#ifdef VIENNACL_WITH_OPENCL - case OPENCL_MEMORY: - handle.opencl_handle().context(new_ctx.opencl_context()); - handle.opencl_handle() = opencl::memory_create(handle.opencl_handle().context(), handle.raw_size(), handle.ram_handle().get()); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case CUDA_MEMORY: - handle.cuda_handle() = cuda::memory_create(handle.raw_size(), handle.ram_handle().get()); - break; -#endif - case MAIN_MEMORY: - default: - throw memory_exception("Invalid destination domain"); - } - } -#ifdef VIENNACL_WITH_OPENCL - else if (handle.get_active_handle_id() == OPENCL_MEMORY) // data can be dumped into destination directly - { - std::vector<DataType> buffer; - - switch (new_ctx.memory_type()) - { - case MAIN_MEMORY: - handle.ram_handle() = cpu_ram::memory_create(handle.raw_size()); - opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), handle.ram_handle().get()); - break; -#ifdef VIENNACL_WITH_CUDA - case CUDA_MEMORY: - buffer.resize(handle.raw_size() / sizeof(DataType)); - opencl::memory_read(handle.opencl_handle(), 0, handle.raw_size(), &(buffer[0])); - cuda::memory_create(handle.cuda_handle(), handle.raw_size(), &(buffer[0])); - break; -#endif - default: - throw memory_exception("Invalid destination domain"); - } - } -#endif -#ifdef VIENNACL_WITH_CUDA - else //CUDA_MEMORY - { - std::vector<DataType> buffer; - - // write - switch (new_ctx.memory_type()) - { - case MAIN_MEMORY: - handle.ram_handle() = cpu_ram::memory_create(handle.raw_size()); - cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), handle.ram_handle().get()); - break; -#ifdef VIENNACL_WITH_OPENCL - case OPENCL_MEMORY: - buffer.resize(handle.raw_size() / sizeof(DataType)); - cuda::memory_read(handle.cuda_handle(), 0, handle.raw_size(), &(buffer[0])); - handle.opencl_handle() = opencl::memory_create(handle.raw_size(), &(buffer[0])); - break; -#endif - default: - throw memory_exception("Unsupported source memory domain"); - } - } -#endif - - // everything succeeded so far, now switch to new domain: - handle.switch_active_handle_id(new_ctx.memory_type()); - - } // no data conversion - } - - - - /** @brief Copies data of the provided 'DataType' from 'handle_src' to 'handle_dst' and converts the data if the binary representation of 'DataType' among the memory domains differs. */ - template<typename DataType> - void typesafe_memory_copy(mem_handle const & handle_src, mem_handle & handle_dst) - { - if (handle_dst.get_active_handle_id() == MEMORY_NOT_INITIALIZED) - handle_dst.switch_active_handle_id(default_memory_type()); - - vcl_size_t element_size_src = detail::element_size<DataType>(handle_src.get_active_handle_id()); - vcl_size_t element_size_dst = detail::element_size<DataType>(handle_dst.get_active_handle_id()); - - if (element_size_src != element_size_dst) - { - // Data needs to be converted. - - typesafe_host_array<DataType> buffer_src(handle_src); - typesafe_host_array<DataType> buffer_dst(handle_dst, handle_src.raw_size() / element_size_src); - - // - // Step 1: Fill buffer_dst depending on where the data resides: - // - DataType const * src_data; - switch (handle_src.get_active_handle_id()) - { - case MAIN_MEMORY: - src_data = reinterpret_cast<DataType const *>(handle_src.ram_handle().get()); - for (vcl_size_t i=0; i<buffer_dst.size(); ++i) - buffer_dst.set(i, src_data[i]); - break; - -#ifdef VIENNACL_WITH_OPENCL - case OPENCL_MEMORY: - buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src); - opencl::memory_read(handle_src.opencl_handle(), 0, buffer_src.raw_size(), buffer_src.get()); - for (vcl_size_t i=0; i<buffer_dst.size(); ++i) - buffer_dst.set(i, buffer_src[i]); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case CUDA_MEMORY: - buffer_src.resize(handle_src, handle_src.raw_size() / element_size_src); - cuda::memory_read(handle_src.cuda_handle(), 0, buffer_src.raw_size(), buffer_src.get()); - for (vcl_size_t i=0; i<buffer_dst.size(); ++i) - buffer_dst.set(i, buffer_src[i]); - break; -#endif - - default: - throw memory_exception("unsupported memory domain"); - } - - // - // Step 2: Write to destination - // - if (handle_dst.raw_size() == buffer_dst.raw_size()) - viennacl::backend::memory_write(handle_dst, 0, buffer_dst.raw_size(), buffer_dst.get()); - else - viennacl::backend::memory_create(handle_dst, buffer_dst.raw_size(), viennacl::traits::context(handle_dst), buffer_dst.get()); - - } - else - { - // No data conversion required. - typesafe_host_array<DataType> buffer(handle_src); - - switch (handle_src.get_active_handle_id()) - { - case MAIN_MEMORY: - switch (handle_dst.get_active_handle_id()) - { - case MAIN_MEMORY: - case OPENCL_MEMORY: - case CUDA_MEMORY: - if (handle_dst.raw_size() == handle_src.raw_size()) - viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), handle_src.ram_handle().get()); - else - viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst), handle_src.ram_handle().get()); - break; - - default: - throw memory_exception("unsupported destination memory domain"); - } - break; - - case OPENCL_MEMORY: - switch (handle_dst.get_active_handle_id()) - { - case MAIN_MEMORY: - if (handle_dst.raw_size() != handle_src.raw_size()) - viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); - viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get()); - break; - - case OPENCL_MEMORY: - if (handle_dst.raw_size() != handle_src.raw_size()) - viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); - viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size()); - break; - - case CUDA_MEMORY: - if (handle_dst.raw_size() != handle_src.raw_size()) - viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); - buffer.resize(handle_src, handle_src.raw_size() / element_size_src); - viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get()); - viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get()); - break; - - default: - throw memory_exception("unsupported destination memory domain"); - } - break; - - case CUDA_MEMORY: - switch (handle_dst.get_active_handle_id()) - { - case MAIN_MEMORY: - if (handle_dst.raw_size() != handle_src.raw_size()) - viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); - viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), handle_dst.ram_handle().get()); - break; - - case OPENCL_MEMORY: - if (handle_dst.raw_size() != handle_src.raw_size()) - viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); - buffer.resize(handle_src, handle_src.raw_size() / element_size_src); - viennacl::backend::memory_read(handle_src, 0, handle_src.raw_size(), buffer.get()); - viennacl::backend::memory_write(handle_dst, 0, handle_src.raw_size(), buffer.get()); - break; - - case CUDA_MEMORY: - if (handle_dst.raw_size() != handle_src.raw_size()) - viennacl::backend::memory_create(handle_dst, handle_src.raw_size(), viennacl::traits::context(handle_dst)); - viennacl::backend::memory_copy(handle_src, handle_dst, 0, 0, handle_src.raw_size()); - break; - - default: - throw memory_exception("unsupported destination memory domain"); - } - break; - - default: - throw memory_exception("unsupported source memory domain"); - } - - } - } - - -} //backend - -// -// Convenience layer: -// -/** @brief Generic convenience routine for migrating data of an object to a new memory domain */ -template<typename T> -void switch_memory_context(T & obj, viennacl::context new_ctx) -{ - obj.switch_memory_context(new_ctx); -} - -} //viennacl -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp deleted file mode 100644 index a8be55a..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/backend/opencl.hpp +++ /dev/null @@ -1,151 +0,0 @@ -#ifndef VIENNACL_BACKEND_OPENCL_HPP_ -#define VIENNACL_BACKEND_OPENCL_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/backend/opencl.hpp - @brief Implementations for the OpenCL backend functionality -*/ - - -#include <vector> -#include "viennacl/ocl/handle.hpp" -#include "viennacl/ocl/backend.hpp" - -namespace viennacl -{ -namespace backend -{ -namespace opencl -{ - -// Requirements for backend: - -// * memory_create(size, host_ptr) -// * memory_copy(src, dest, offset_src, offset_dest, size) -// * memory_write_from_main_memory(src, offset, size, -// dest, offset, size) -// * memory_read_to_main_memory(src, offset, size -// dest, offset, size) -// * -// - -/** @brief Creates an array of the specified size in the current OpenCL context. If the second argument is provided, the buffer is initialized with data from that pointer. - * - * @param size_in_bytes Number of bytes to allocate - * @param host_ptr Pointer to data which will be copied to the new array. Must point to at least 'size_in_bytes' bytes of data. - * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) - * - */ -inline cl_mem memory_create(viennacl::ocl::context const & ctx, vcl_size_t size_in_bytes, const void * host_ptr = NULL) -{ - //std::cout << "Creating buffer (" << size_in_bytes << " bytes) host buffer " << host_ptr << " in context " << &ctx << std::endl; - return ctx.create_memory_without_smart_handle(CL_MEM_READ_WRITE, static_cast<unsigned int>(size_in_bytes), const_cast<void *>(host_ptr)); -} - -/** @brief Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' in the OpenCL context to memory starting at address 'dst_buffer + dst_offset' in the same OpenCL context. - * - * @param src_buffer A smart pointer to the begin of an allocated OpenCL buffer - * @param dst_buffer A smart pointer to the end of an allocated OpenCL buffer - * @param src_offset Offset of the first byte to be written from the address given by 'src_buffer' (in bytes) - * @param dst_offset Offset of the first byte to be written to the address given by 'dst_buffer' (in bytes) - * @param bytes_to_copy Number of bytes to be copied - */ -inline void memory_copy(viennacl::ocl::handle<cl_mem> const & src_buffer, - viennacl::ocl::handle<cl_mem> & dst_buffer, - vcl_size_t src_offset, - vcl_size_t dst_offset, - vcl_size_t bytes_to_copy) -{ - assert( &src_buffer.context() == &dst_buffer.context() && bool("Transfer between memory buffers in different contexts not supported yet!")); - - viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(src_buffer.context()); - cl_int err = clEnqueueCopyBuffer(memory_context.get_queue().handle().get(), - src_buffer.get(), - dst_buffer.get(), - src_offset, - dst_offset, - bytes_to_copy, - 0, NULL, NULL); //events - VIENNACL_ERR_CHECK(err); -} - - -/** @brief Writes data from main RAM identified by 'ptr' to the OpenCL buffer identified by 'dst_buffer' - * - * @param dst_buffer A smart pointer to the beginning of an allocated OpenCL buffer - * @param dst_offset Offset of the first written byte from the beginning of 'dst_buffer' (in bytes) - * @param bytes_to_copy Number of bytes to be copied - * @param ptr Pointer to the first byte to be written - * @param async Whether the operation should be asynchronous - */ -inline void memory_write(viennacl::ocl::handle<cl_mem> & dst_buffer, - vcl_size_t dst_offset, - vcl_size_t bytes_to_copy, - const void * ptr, - bool async = false) -{ - - viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(dst_buffer.context()); - -#if defined(VIENNACL_DEBUG_ALL) || defined(VIENNACL_DEBUG_DEVICE) - std::cout << "Writing data (" << bytes_to_copy << " bytes, offset " << dst_offset << ") to OpenCL buffer " << dst_buffer.get() << " with queue " << memory_context.get_queue().handle().get() << " from " << ptr << std::endl; -#endif - - cl_int err = clEnqueueWriteBuffer(memory_context.get_queue().handle().get(), - dst_buffer.get(), - async ? CL_FALSE : CL_TRUE, //blocking - dst_offset, - bytes_to_copy, - ptr, - 0, NULL, NULL); //events - VIENNACL_ERR_CHECK(err); -} - - -/** @brief Reads data from an OpenCL buffer back to main RAM. - * - * @param src_buffer A smart pointer to the beginning of an allocated OpenCL source buffer - * @param src_offset Offset of the first byte to be read from the beginning of src_buffer (in bytes_ - * @param bytes_to_copy Number of bytes to be read - * @param ptr Location in main RAM where to read data should be written to - * @param async Whether the operation should be asynchronous - */ -inline void memory_read(viennacl::ocl::handle<cl_mem> const & src_buffer, - vcl_size_t src_offset, - vcl_size_t bytes_to_copy, - void * ptr, - bool async = false) -{ - //std::cout << "Reading data (" << bytes_to_copy << " bytes, offset " << src_offset << ") from OpenCL buffer " << src_buffer.get() << " to " << ptr << std::endl; - viennacl::ocl::context & memory_context = const_cast<viennacl::ocl::context &>(src_buffer.context()); - cl_int err = clEnqueueReadBuffer(memory_context.get_queue().handle().get(), - src_buffer.get(), - async ? CL_FALSE : CL_TRUE, //blocking - src_offset, - bytes_to_copy, - ptr, - 0, NULL, NULL); //events - VIENNACL_ERR_CHECK(err); -} - - -} -} //backend -} //viennacl -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp b/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp deleted file mode 100644 index 9aaeb2e..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/backend/util.hpp +++ /dev/null @@ -1,268 +0,0 @@ -#ifndef VIENNACL_BACKEND_UTIL_HPP -#define VIENNACL_BACKEND_UTIL_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/backend/util.hpp - @brief Helper functionality for working with different memory domains -*/ - -#include <vector> -#include <cassert> - -#include "viennacl/forwards.h" -#include "viennacl/backend/mem_handle.hpp" - -#ifdef VIENNACL_WITH_OPENCL -#include "viennacl/backend/opencl.hpp" -#endif - - -namespace viennacl -{ -namespace backend -{ -namespace detail -{ - - /** @brief Helper struct for converting a type to its OpenCL pendant. */ - template<typename T> - struct convert_to_opencl - { - typedef T type; - enum { special = 0 }; - }; - -#ifdef VIENNACL_WITH_OPENCL - template<> - struct convert_to_opencl<unsigned int> - { - typedef cl_uint type; - //enum { special = (sizeof(unsigned int) != sizeof(cl_uint)) }; - enum { special = 1 }; - }; - - template<> - struct convert_to_opencl<int> - { - typedef cl_int type; - //enum { special = (sizeof(int) != sizeof(cl_int)) }; - enum { special = 1 }; - }; - - - template<> - struct convert_to_opencl<unsigned long> - { - typedef cl_ulong type; - //enum { special = (sizeof(unsigned long) != sizeof(cl_ulong)) }; - enum { special = 1 }; - }; - - template<> - struct convert_to_opencl<long> - { - typedef cl_long type; - //enum { special = (sizeof(long) != sizeof(cl_long)) }; - enum { special = 1 }; - }; -#endif - - -} //namespace detail - - -/** @brief Helper class implementing an array on the host. Default case: No conversion necessary */ -template<typename T, bool special = detail::convert_to_opencl<T>::special> -class typesafe_host_array -{ - typedef T cpu_type; - typedef typename detail::convert_to_opencl<T>::type target_type; - -public: - explicit typesafe_host_array() : bytes_buffer_(NULL), buffer_size_(0) {} - - explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) - { - resize(handle, num); - } - - ~typesafe_host_array() { delete[] bytes_buffer_; } - - // - // Setter and Getter - // - void * get() { return reinterpret_cast<void *>(bytes_buffer_); } - vcl_size_t raw_size() const { return buffer_size_; } - vcl_size_t element_size() const { return sizeof(cpu_type); } - vcl_size_t size() const { return buffer_size_ / element_size(); } - template<typename U> - void set(vcl_size_t index, U value) - { - reinterpret_cast<cpu_type *>(bytes_buffer_)[index] = static_cast<cpu_type>(value); - } - - // - // Resize functionality - // - - /** @brief Resize without initializing the new memory */ - void raw_resize(mem_handle const & /*handle*/, vcl_size_t num) - { - buffer_size_ = sizeof(cpu_type) * num; - - if (num > 0) - { - delete[] bytes_buffer_; - - bytes_buffer_ = new char[buffer_size_]; - } - } - - /** @brief Resize including initialization of new memory (cf. std::vector<>) */ - void resize(mem_handle const & handle, vcl_size_t num) - { - raw_resize(handle, num); - - if (num > 0) - { - for (vcl_size_t i=0; i<buffer_size_; ++i) - bytes_buffer_[i] = 0; - } - } - - cpu_type operator[](vcl_size_t index) const - { - assert(index < size() && bool("index out of bounds")); - - return reinterpret_cast<cpu_type *>(bytes_buffer_)[index]; - } - -private: - char * bytes_buffer_; - vcl_size_t buffer_size_; -}; - - - - -/** @brief Special host array type for conversion between OpenCL types and pure CPU types */ -template<typename T> -class typesafe_host_array<T, true> -{ - typedef T cpu_type; - typedef typename detail::convert_to_opencl<T>::type target_type; - -public: - explicit typesafe_host_array() : convert_to_opencl_( (default_memory_type() == OPENCL_MEMORY) ? true : false), bytes_buffer_(NULL), buffer_size_(0) {} - - explicit typesafe_host_array(mem_handle const & handle, vcl_size_t num = 0) : convert_to_opencl_(false), bytes_buffer_(NULL), buffer_size_(sizeof(cpu_type) * num) - { - resize(handle, num); - } - - ~typesafe_host_array() { delete[] bytes_buffer_; } - - // - // Setter and Getter - // - - template<typename U> - void set(vcl_size_t index, U value) - { -#ifdef VIENNACL_WITH_OPENCL - if (convert_to_opencl_) - reinterpret_cast<target_type *>(bytes_buffer_)[index] = static_cast<target_type>(value); - else -#endif - reinterpret_cast<cpu_type *>(bytes_buffer_)[index] = static_cast<cpu_type>(value); - } - - void * get() { return reinterpret_cast<void *>(bytes_buffer_); } - cpu_type operator[](vcl_size_t index) const - { - assert(index < size() && bool("index out of bounds")); -#ifdef VIENNACL_WITH_OPENCL - if (convert_to_opencl_) - return static_cast<cpu_type>(reinterpret_cast<target_type *>(bytes_buffer_)[index]); -#endif - return reinterpret_cast<cpu_type *>(bytes_buffer_)[index]; - } - - vcl_size_t raw_size() const { return buffer_size_; } - vcl_size_t element_size() const - { -#ifdef VIENNACL_WITH_OPENCL - if (convert_to_opencl_) - return sizeof(target_type); -#endif - return sizeof(cpu_type); - } - vcl_size_t size() const { return buffer_size_ / element_size(); } - - // - // Resize functionality - // - - /** @brief Resize without initializing the new memory */ - void raw_resize(mem_handle const & handle, vcl_size_t num) - { - buffer_size_ = sizeof(cpu_type) * num; - (void)handle; //silence unused variable warning if compiled without OpenCL support - -#ifdef VIENNACL_WITH_OPENCL - memory_types mem_type = handle.get_active_handle_id(); - if (mem_type == MEMORY_NOT_INITIALIZED) - mem_type = default_memory_type(); - - if (mem_type == OPENCL_MEMORY) - { - convert_to_opencl_ = true; - buffer_size_ = sizeof(target_type) * num; - } -#endif - - if (num > 0) - { - delete[] bytes_buffer_; - - bytes_buffer_ = new char[buffer_size_]; - } - } - - /** @brief Resize including initialization of new memory (cf. std::vector<>) */ - void resize(mem_handle const & handle, vcl_size_t num) - { - raw_resize(handle, num); - - if (num > 0) - { - for (vcl_size_t i=0; i<buffer_size_; ++i) - bytes_buffer_[i] = 0; - } - } - -private: - bool convert_to_opencl_; - char * bytes_buffer_; - vcl_size_t buffer_size_; -}; - -} //backend -} //viennacl -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp deleted file mode 100644 index 1ee13d5..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/circulant_matrix.hpp +++ /dev/null @@ -1,359 +0,0 @@ -#ifndef VIENNACL_CIRCULANT_MATRIX_HPP -#define VIENNACL_CIRCULANT_MATRIX_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file circulant_matrix.hpp - @brief Implementation of the circulant_matrix class for efficient manipulation of circulant matrices. Experimental. -*/ - -#include "viennacl/forwards.h" -#include "viennacl/vector.hpp" -#include "viennacl/ocl/backend.hpp" - -#include "viennacl/linalg/circulant_matrix_operations.hpp" - -#include "viennacl/fft.hpp" - -namespace viennacl -{ -/** @brief A Circulant matrix class - * - * @tparam NumericT The underlying scalar type (either float or double) - * @tparam AlignmentV The internal memory size is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. - */ -template<class NumericT, unsigned int AlignmentV> -class circulant_matrix -{ -public: - typedef viennacl::backend::mem_handle handle_type; - typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type; - - /** - * @brief The default constructor. Does not allocate any memory. - * - */ - explicit circulant_matrix() {} - - /** - * @brief Creates the matrix with the given size - * - * @param rows Number of rows of the matrix - * @param cols Number of columns of the matrix - */ - explicit circulant_matrix(vcl_size_t rows, vcl_size_t cols) : elements_(rows) - { - assert(rows == cols && bool("Circulant matrix must be square!")); - (void)cols; // avoid 'unused parameter' warning in optimized builds - } - - /** @brief Resizes the matrix. - * Existing entries can be preserved - * - * @param sz New size of matrix - * @param preserve If true, existing values are preserved. - */ - void resize(vcl_size_t sz, bool preserve = true) - { - elements_.resize(sz, preserve); - } - - /** @brief Returns the OpenCL handle - * - * @return OpenCL handle - */ - handle_type const & handle() const { return elements_.handle(); } - - /** - * @brief Returns an internal viennacl::vector, which represents a circulant matrix elements - * - */ - viennacl::vector<NumericT, AlignmentV> & elements() { return elements_; } - viennacl::vector<NumericT, AlignmentV> const & elements() const { return elements_; } - - /** - * @brief Returns the number of rows of the matrix - */ - vcl_size_t size1() const { return elements_.size(); } - - /** - * @brief Returns the number of columns of the matrix - */ - vcl_size_t size2() const { return elements_.size(); } - - /** @brief Returns the internal size of matrix representtion. - * Usually required for launching OpenCL kernels only - * - * @return Internal size of matrix representation - */ - vcl_size_t internal_size() const { return elements_.internal_size(); } - - /** - * @brief Read-write access to a single element of the matrix - * - * @param row_index Row index of accessed element - * @param col_index Column index of accessed element - * @return Proxy for matrix entry - */ - entry_proxy<NumericT> operator()(vcl_size_t row_index, vcl_size_t col_index) - { - long index = static_cast<long>(row_index) - static_cast<long>(col_index); - - assert(row_index < size1() && col_index < size2() && bool("Invalid access")); - - while (index < 0) - index += static_cast<long>(size1()); - return elements_[static_cast<vcl_size_t>(index)]; - } - - /** - * @brief += operation for circulant matrices - * - * @param that Matrix which will be added - * @return Result of addition - */ - circulant_matrix<NumericT, AlignmentV>& operator +=(circulant_matrix<NumericT, AlignmentV>& that) - { - elements_ += that.elements(); - return *this; - } - -private: - circulant_matrix(circulant_matrix const &) {} - circulant_matrix & operator=(circulant_matrix const & t); - - viennacl::vector<NumericT, AlignmentV> elements_; -}; - -/** @brief Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) - * - * - * @param cpu_vec A std::vector on the host. - * @param gpu_mat A circulant_matrix from ViennaCL - */ -template<typename NumericT, unsigned int AlignmentV> -void copy(std::vector<NumericT>& cpu_vec, circulant_matrix<NumericT, AlignmentV>& gpu_mat) -{ - assert( (gpu_mat.size1() == 0 || cpu_vec.size() == gpu_mat.size1()) && bool("Size mismatch")); - copy(cpu_vec, gpu_mat.elements()); -} - -/** @brief Copies a circulant matrix from the OpenCL device (either GPU or multi-core CPU) to the std::vector - * - * - * @param gpu_mat A circulant_matrix from ViennaCL - * @param cpu_vec A std::vector on the host. - */ -template<typename NumericT, unsigned int AlignmentV> -void copy(circulant_matrix<NumericT, AlignmentV>& gpu_mat, std::vector<NumericT>& cpu_vec) -{ - assert(cpu_vec.size() == gpu_mat.size1() && bool("Size mismatch")); - copy(gpu_mat.elements(), cpu_vec); -} - -/** @brief Copies a circulant matrix from the OpenCL device (either GPU or multi-core CPU) to the matrix-like object - * - * - * @param circ_src A circulant_matrix from ViennaCL - * @param com_dst A matrix-like object - */ -template<typename NumericT, unsigned int AlignmentV, typename MatrixT> -void copy(circulant_matrix<NumericT, AlignmentV>& circ_src, MatrixT& com_dst) -{ - vcl_size_t size = circ_src.size1(); - assert(size == viennacl::traits::size1(com_dst) && bool("Size mismatch")); - assert(size == viennacl::traits::size2(com_dst) && bool("Size mismatch")); - std::vector<NumericT> tmp(size); - copy(circ_src, tmp); - - for (vcl_size_t i = 0; i < size; i++) - { - for (vcl_size_t j = 0; j < size; j++) - { - long index = static_cast<long>(i) - static_cast<long>(j); - if (index < 0) - index += static_cast<long>(size); - com_dst(i, j) = tmp[static_cast<vcl_size_t>(index)]; - } - } -} - -/** @brief Copies a the matrix-like object to the circulant matrix from the OpenCL device (either GPU or multi-core CPU) - * - * - * @param com_src A std::vector on the host - * @param circ_dst A circulant_matrix from ViennaCL - */ -template<typename NumericT, unsigned int AlignmentV, typename MatrixT> -void copy(MatrixT& com_src, circulant_matrix<NumericT, AlignmentV>& circ_dst) -{ - assert( (circ_dst.size1() == 0 || circ_dst.size1() == viennacl::traits::size1(com_src)) && bool("Size mismatch")); - assert( (circ_dst.size2() == 0 || circ_dst.size2() == viennacl::traits::size2(com_src)) && bool("Size mismatch")); - - vcl_size_t size = viennacl::traits::size1(com_src); - - std::vector<NumericT> tmp(size); - - for (vcl_size_t i = 0; i < size; i++) tmp[i] = com_src(i, 0); - - copy(tmp, circ_dst); -} - -/*namespace linalg - { - template<typename NumericT, unsigned int AlignmentV, unsigned int VECTOR_AlignmentV> - void prod_impl(circulant_matrix<NumericT, AlignmentV> const & mat, - vector<NumericT, VECTOR_AlignmentV> const & vec, - vector<NumericT, VECTOR_AlignmentV>& result) { - viennacl::vector<NumericT, VECTOR_AlignmentV> circ(mat.elements().size() * 2); - fft::real_to_complex(mat.elements(), circ, mat.elements().size()); - - viennacl::vector<NumericT, VECTOR_AlignmentV> tmp(vec.size() * 2); - viennacl::vector<NumericT, VECTOR_AlignmentV> tmp2(vec.size() * 2); - - fft::real_to_complex(vec, tmp, vec.size()); - fft::convolve(circ, tmp, tmp2); - fft::complex_to_real(tmp2, result, vec.size()); - } - }*/ - -/** @brief Prints the matrix. Output is compatible to boost::numeric::ublas - * - * @param s STL output stream - * @param gpu_matrix A ViennaCL circulant matrix - */ -template<class NumericT, unsigned int AlignmentV> -std::ostream & operator<<(std::ostream& s, circulant_matrix<NumericT, AlignmentV>& gpu_matrix) -{ - vcl_size_t size = gpu_matrix.size1(); - std::vector<NumericT> tmp(size); - copy(gpu_matrix, tmp); - s << "[" << size << "," << size << "]("; - - for (vcl_size_t i = 0; i < size; i++) - { - s << "("; - for (vcl_size_t j = 0; j < size; j++) - { - long index = static_cast<long>(i) - static_cast<long>(j); - if (index < 0) index = static_cast<long>(size) + index; - s << tmp[vcl_size_t(index)]; - //s << index; - if (j < (size - 1)) s << ","; - } - s << ")"; - } - s << ")"; - return s; -} - -// -// Specify available operations: -// - -/** \cond */ - -namespace linalg -{ -namespace detail -{ - // x = A * y - template<typename T, unsigned int A> - struct op_executor<vector_base<T>, op_assign, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) - { - // check for the special case x = A * x - if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); - lhs = temp; - } - else - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); - } - }; - - template<typename T, unsigned int A> - struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); - lhs += temp; - } - }; - - template<typename T, unsigned int A> - struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); - lhs -= temp; - } - }; - - - // x = A * vec_op - template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_assign, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs()); - viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); - } - }; - - // x = A * vec_op - template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const circulant_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs()); - viennacl::vector<T> temp_result(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); - lhs += temp_result; - } - }; - - // x = A * vec_op - template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const circulant_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs()); - viennacl::vector<T> temp_result(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); - lhs -= temp_result; - } - }; - -} // namespace detail -} // namespace linalg - -/** \endcond */ -} - -#endif // VIENNACL_CIRCULANT_MATRIX_HPP
