http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/compressed_compressed_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/compressed_compressed_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/compressed_compressed_matrix.hpp deleted file mode 100644 index f1719a2..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/compressed_compressed_matrix.hpp +++ /dev/null @@ -1,619 +0,0 @@ -#ifndef VIENNACL_COMPRESSED_compressed_compressed_matrix_HPP_ -#define VIENNACL_COMPRESSED_compressed_compressed_matrix_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/compressed_compressed_matrix.hpp - @brief Implementation of the compressed_compressed_matrix class (CSR format with a relatively small number of nonzero rows) -*/ - -#include <vector> -#include <list> -#include <map> -#include "viennacl/forwards.h" -#include "viennacl/vector.hpp" - -#include "viennacl/linalg/sparse_matrix_operations.hpp" - -#include "viennacl/tools/tools.hpp" -#include "viennacl/tools/entry_proxy.hpp" - -namespace viennacl -{ -namespace detail -{ - template<typename CPUMatrixT, typename NumericT> - void copy_impl(const CPUMatrixT & cpu_matrix, - compressed_compressed_matrix<NumericT> & gpu_matrix, - vcl_size_t nonzero_rows, - vcl_size_t nonzeros) - { - assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); - - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), nonzero_rows + 1); - viennacl::backend::typesafe_host_array<unsigned int> row_indices(gpu_matrix.handle3(), nonzero_rows); - viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), nonzeros); - std::vector<NumericT> elements(nonzeros); - - vcl_size_t row_index = 0; - vcl_size_t data_index = 0; - - for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); - row_it != cpu_matrix.end1(); - ++row_it) - { - bool row_empty = true; - - for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); - col_it != row_it.end(); - ++col_it) - { - NumericT entry = *col_it; - if (entry < 0 || entry > 0) // entry != 0 without compiler warnings - { - if (row_empty) - { - assert(row_index < nonzero_rows && bool("Provided count of nonzero rows exceeded!")); - - row_empty = false; - row_buffer.set(row_index, data_index); - row_indices.set(row_index, col_it.index1()); - ++row_index; - } - - col_buffer.set(data_index, col_it.index2()); - elements[data_index] = entry; - ++data_index; - } - } - } - row_buffer.set(row_index, data_index); - - gpu_matrix.set(row_buffer.get(), - row_indices.get(), - col_buffer.get(), - &elements[0], - cpu_matrix.size1(), - cpu_matrix.size2(), - nonzero_rows, - nonzeros); - } -} - -//provide copy-operation: -/** @brief Copies a sparse matrix from the host to the OpenCL device (either GPU or multi-core CPU) - * - * There are some type requirements on the CPUMatrixT type (fulfilled by e.g. boost::numeric::ublas): - * - .size1() returns the number of rows - * - .size2() returns the number of columns - * - const_iterator1 is a type definition for an iterator along increasing row indices - * - const_iterator2 is a type definition for an iterator along increasing columns indices - * - The const_iterator1 type provides an iterator of type const_iterator2 via members .begin() and .end() that iterates along column indices in the current row. - * - The types const_iterator1 and const_iterator2 provide members functions .index1() and .index2() that return the current row and column indices respectively. - * - Dereferenciation of an object of type const_iterator2 returns the entry. - * - * @param cpu_matrix A sparse matrix on the host. - * @param gpu_matrix A compressed_compressed_matrix from ViennaCL - */ -template<typename CPUMatrixT, typename NumericT> -void copy(const CPUMatrixT & cpu_matrix, - compressed_compressed_matrix<NumericT> & gpu_matrix ) -{ - //std::cout << "copy for (" << cpu_matrix.size1() << ", " << cpu_matrix.size2() << ", " << cpu_matrix.nnz() << ")" << std::endl; - - if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) - { - //determine nonzero rows and total nonzeros: - vcl_size_t num_entries = 0; - vcl_size_t nonzero_rows = 0; - for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); - row_it != cpu_matrix.end1(); - ++row_it) - { - bool row_empty = true; - for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); - col_it != row_it.end(); - ++col_it) - { - NumericT val = *col_it; - if (val < 0 || val > 0) // val != 0 without compiler warnings - { - ++num_entries; - - if (row_empty) - { - row_empty = false; - ++nonzero_rows; - } - } - } - } - - if (num_entries == 0) //we copy an empty matrix - num_entries = 1; - - //set up matrix entries: - viennacl::detail::copy_impl(cpu_matrix, gpu_matrix, nonzero_rows, num_entries); - } -} - - -//adapted for std::vector< std::map < > > argument: -/** @brief Copies a sparse square matrix in the std::vector< std::map < > > format to an OpenCL device. Use viennacl::tools::sparse_matrix_adapter for non-square matrices. - * - * @param cpu_matrix A sparse square matrix on the host using STL types - * @param gpu_matrix A compressed_compressed_matrix from ViennaCL - */ -template<typename SizeT, typename NumericT> -void copy(const std::vector< std::map<SizeT, NumericT> > & cpu_matrix, - compressed_compressed_matrix<NumericT> & gpu_matrix ) -{ - vcl_size_t nonzero_rows = 0; - vcl_size_t nonzeros = 0; - vcl_size_t max_col = 0; - for (vcl_size_t i=0; i<cpu_matrix.size(); ++i) - { - if (cpu_matrix[i].size() > 0) - ++nonzero_rows; - nonzeros += cpu_matrix[i].size(); - if (cpu_matrix[i].size() > 0) - max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first); - } - - viennacl::detail::copy_impl(tools::const_sparse_matrix_adapter<NumericT, SizeT>(cpu_matrix, cpu_matrix.size(), max_col + 1), - gpu_matrix, - nonzero_rows, - nonzeros); -} - - -// -// gpu to cpu: -// -/** @brief Copies a sparse matrix from the OpenCL device (either GPU or multi-core CPU) to the host. - * - * There are two type requirements on the CPUMatrixT type (fulfilled by e.g. boost::numeric::ublas): - * - resize(rows, cols) A resize function to bring the matrix into the correct size - * - operator(i,j) Write new entries via the parenthesis operator - * - * @param gpu_matrix A compressed_compressed_matrix from ViennaCL - * @param cpu_matrix A sparse matrix on the host. - */ -template<typename CPUMatrixT, typename NumericT> -void copy(const compressed_compressed_matrix<NumericT> & gpu_matrix, - CPUMatrixT & cpu_matrix ) -{ - assert( (cpu_matrix.size1() == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (cpu_matrix.size2() == gpu_matrix.size2()) && bool("Size mismatch") ); - - if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) - { - //get raw data from memory: - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.nnz1() + 1); - viennacl::backend::typesafe_host_array<unsigned int> row_indices(gpu_matrix.handle1(), gpu_matrix.nnz1()); - viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); - std::vector<NumericT> elements(gpu_matrix.nnz()); - - //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl; - - viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); - viennacl::backend::memory_read(gpu_matrix.handle3(), 0, row_indices.raw_size(), row_indices.get()); - viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); - viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0])); - - //fill the cpu_matrix: - vcl_size_t data_index = 0; - for (vcl_size_t i = 1; i < row_buffer.size(); ++i) - { - while (data_index < row_buffer[i]) - { - if (col_buffer[data_index] >= gpu_matrix.size2()) - { - std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl; - return; - } - - NumericT val = elements[data_index]; - if (val < 0 || val > 0) // val != 0 without compiler warning - cpu_matrix(row_indices[i-1], col_buffer[data_index]) = val; - ++data_index; - } - } - } -} - - -/** @brief Copies a sparse matrix from an OpenCL device to the host. The host type is the std::vector< std::map < > > format . - * - * @param gpu_matrix A compressed_compressed_matrix from ViennaCL - * @param cpu_matrix A sparse matrix on the host. - */ -template<typename NumericT> -void copy(const compressed_compressed_matrix<NumericT> & gpu_matrix, - std::vector< std::map<unsigned int, NumericT> > & cpu_matrix) -{ - tools::sparse_matrix_adapter<NumericT> temp(cpu_matrix, cpu_matrix.size(), cpu_matrix.size()); - copy(gpu_matrix, temp); -} - - -//////////////////////// compressed_compressed_matrix ////////////////////////// -/** @brief A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows carry nonzero entries. - * - * The difference to the 'standard' CSR format is that there is an additional array 'row_indices' so that the i-th set of indices in the CSR-layout refers to row_indices[i]. - * - * @tparam NumericT The floating point type (either float or double, checked at compile time) - * @tparam AlignmentV The internal memory size for the entries in each row is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. - */ -template<class NumericT> -class compressed_compressed_matrix -{ -public: - typedef viennacl::backend::mem_handle handle_type; - typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type; - typedef vcl_size_t size_type; - - /** @brief Default construction of a compressed matrix. No memory is allocated */ - compressed_compressed_matrix() : rows_(0), cols_(0), nonzero_rows_(0), nonzeros_(0) {} - - /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated - * - * @param rows Number of rows - * @param cols Number of columns - * @param nonzero_rows Optional number of nonzero rows for memory preallocation - * @param nonzeros Optional number of nonzeros for memory preallocation - * @param ctx Context in which to create the matrix. Uses the default context if omitted - */ - explicit compressed_compressed_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzero_rows = 0, vcl_size_t nonzeros = 0, viennacl::context ctx = viennacl::context()) - : rows_(rows), cols_(cols), nonzero_rows_(nonzero_rows), nonzeros_(nonzeros) - { - row_buffer_.switch_active_handle_id(ctx.memory_type()); - row_indices_.switch_active_handle_id(ctx.memory_type()); - col_buffer_.switch_active_handle_id(ctx.memory_type()); - elements_.switch_active_handle_id(ctx.memory_type()); - -#ifdef VIENNACL_WITH_OPENCL - if (ctx.memory_type() == OPENCL_MEMORY) - { - row_buffer_.opencl_handle().context(ctx.opencl_context()); - row_indices_.opencl_handle().context(ctx.opencl_context()); - col_buffer_.opencl_handle().context(ctx.opencl_context()); - elements_.opencl_handle().context(ctx.opencl_context()); - } -#endif - if (rows > 0) - { - viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows + 1), ctx); - } - if (nonzeros > 0) - { - viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * nonzeros, ctx); - viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros, ctx); - } - } - - /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated - * - * @param rows Number of rows - * @param cols Number of columns - * @param ctx Context in which to create the matrix - */ - explicit compressed_compressed_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx) - : rows_(rows), cols_(cols), nonzeros_(0) - { - row_buffer_.switch_active_handle_id(ctx.memory_type()); - col_buffer_.switch_active_handle_id(ctx.memory_type()); - elements_.switch_active_handle_id(ctx.memory_type()); - -#ifdef VIENNACL_WITH_OPENCL - if (ctx.memory_type() == OPENCL_MEMORY) - { - row_buffer_.opencl_handle().context(ctx.opencl_context()); - col_buffer_.opencl_handle().context(ctx.opencl_context()); - elements_.opencl_handle().context(ctx.opencl_context()); - } -#endif - if (rows > 0) - { - viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows + 1), ctx); - } - } - - explicit compressed_compressed_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzero_rows_(0), nonzeros_(0) - { - row_buffer_.switch_active_handle_id(ctx.memory_type()); - row_indices_.switch_active_handle_id(ctx.memory_type()); - col_buffer_.switch_active_handle_id(ctx.memory_type()); - elements_.switch_active_handle_id(ctx.memory_type()); - -#ifdef VIENNACL_WITH_OPENCL - if (ctx.memory_type() == OPENCL_MEMORY) - { - row_buffer_.opencl_handle().context(ctx.opencl_context()); - row_indices_.opencl_handle().context(ctx.opencl_context()); - col_buffer_.opencl_handle().context(ctx.opencl_context()); - elements_.opencl_handle().context(ctx.opencl_context()); - } -#endif - } - - -#ifdef VIENNACL_WITH_OPENCL - explicit compressed_compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_row_indices, cl_mem mem_col_buffer, cl_mem mem_elements, - vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzero_rows, vcl_size_t nonzeros) : - rows_(rows), cols_(cols), nonzero_rows_(nonzero_rows), nonzeros_(nonzeros) - { - row_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); - row_buffer_.opencl_handle() = mem_row_buffer; - row_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. - row_buffer_.raw_size(sizeof(cl_uint) * (nonzero_rows + 1)); - - row_indices_.switch_active_handle_id(viennacl::OPENCL_MEMORY); - row_indices_.opencl_handle() = mem_row_indices; - row_indices_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. - row_indices_.raw_size(sizeof(cl_uint) * nonzero_rows); - - col_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); - col_buffer_.opencl_handle() = mem_col_buffer; - col_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. - col_buffer_.raw_size(sizeof(cl_uint) * nonzeros); - - elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); - elements_.opencl_handle() = mem_elements; - elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. - elements_.raw_size(sizeof(NumericT) * nonzeros); - } -#endif - - - /** @brief Assignment a compressed matrix from possibly another memory domain. */ - compressed_compressed_matrix & operator=(compressed_compressed_matrix const & other) - { - assert( (rows_ == 0 || rows_ == other.size1()) && bool("Size mismatch") ); - assert( (cols_ == 0 || cols_ == other.size2()) && bool("Size mismatch") ); - - rows_ = other.size1(); - cols_ = other.size2(); - nonzero_rows_ = other.nnz1(); - nonzeros_ = other.nnz(); - - viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_buffer_, row_buffer_); - viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_indices_, row_indices_); - viennacl::backend::typesafe_memory_copy<unsigned int>(other.col_buffer_, col_buffer_); - viennacl::backend::typesafe_memory_copy<NumericT>(other.elements_, elements_); - - return *this; - } - - - /** @brief Sets the row, column and value arrays of the compressed matrix - * - * @param row_jumper Pointer to an array holding the indices of the first element of each row (starting with zero). E.g. row_jumper[10] returns the index of the first entry of the 11th row. The array length is 'cols + 1' - * @param row_indices Array holding the indices of the nonzero rows - * @param col_buffer Pointer to an array holding the column index of each entry. The array length is 'nonzeros' - * @param elements Pointer to an array holding the entries of the sparse matrix. The array length is 'elements' - * @param rows Number of rows of the sparse matrix - * @param cols Number of columns of the sparse matrix - * @param nonzero_rows Number of nonzero rows - * @param nonzeros Total number of nonzero entries - */ - void set(const void * row_jumper, - const void * row_indices, - const void * col_buffer, - const NumericT * elements, - vcl_size_t rows, - vcl_size_t cols, - vcl_size_t nonzero_rows, - vcl_size_t nonzeros) - { - assert( (rows > 0) && bool("Error in compressed_compressed_matrix::set(): Number of rows must be larger than zero!")); - assert( (cols > 0) && bool("Error in compressed_compressed_matrix::set(): Number of columns must be larger than zero!")); - assert( (nonzero_rows > 0) && bool("Error in compressed_compressed_matrix::set(): Number of nonzero rows must be larger than zero!")); - assert( (nonzeros > 0) && bool("Error in compressed_compressed_matrix::set(): Number of nonzeros must be larger than zero!")); - //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl; - - viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>(row_buffer_).element_size() * (nonzero_rows + 1), viennacl::traits::context(row_buffer_), row_jumper); - viennacl::backend::memory_create(row_indices_, viennacl::backend::typesafe_host_array<unsigned int>(row_indices_).element_size() * nonzero_rows, viennacl::traits::context(row_indices_), row_indices); - viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>(col_buffer_).element_size() * nonzeros, viennacl::traits::context(col_buffer_), col_buffer); - viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros, viennacl::traits::context(elements_), elements); - - nonzeros_ = nonzeros; - nonzero_rows_ = nonzero_rows; - rows_ = rows; - cols_ = cols; - } - - /** @brief Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity pattern. */ - void clear() - { - viennacl::backend::typesafe_host_array<unsigned int> host_row_buffer(row_buffer_, rows_ + 1); - viennacl::backend::typesafe_host_array<unsigned int> host_row_indices(row_indices_, rows_ + 1); - viennacl::backend::typesafe_host_array<unsigned int> host_col_buffer(col_buffer_, 1); - std::vector<NumericT> host_elements(1); - - viennacl::backend::memory_create(row_buffer_, host_row_buffer.element_size() * (rows_ + 1), viennacl::traits::context(row_buffer_), host_row_buffer.get()); - viennacl::backend::memory_create(row_indices_, host_row_indices.element_size() * (rows_ + 1), viennacl::traits::context(row_indices_), host_row_indices.get()); - viennacl::backend::memory_create(col_buffer_, host_col_buffer.element_size() * 1, viennacl::traits::context(col_buffer_), host_col_buffer.get()); - viennacl::backend::memory_create(elements_, sizeof(NumericT) * 1, viennacl::traits::context(elements_), &(host_elements[0])); - - nonzeros_ = 0; - nonzero_rows_ = 0; - } - - /** @brief Returns the number of rows */ - const vcl_size_t & size1() const { return rows_; } - /** @brief Returns the number of columns */ - const vcl_size_t & size2() const { return cols_; } - /** @brief Returns the number of nonzero entries */ - const vcl_size_t & nnz1() const { return nonzero_rows_; } - /** @brief Returns the number of nonzero entries */ - const vcl_size_t & nnz() const { return nonzeros_; } - - /** @brief Returns the OpenCL handle to the row index array */ - const handle_type & handle1() const { return row_buffer_; } - /** @brief Returns the OpenCL handle to the column index array */ - const handle_type & handle2() const { return col_buffer_; } - /** @brief Returns the OpenCL handle to the row index array */ - const handle_type & handle3() const { return row_indices_; } - /** @brief Returns the OpenCL handle to the matrix entry array */ - const handle_type & handle() const { return elements_; } - - /** @brief Returns the OpenCL handle to the row index array */ - handle_type & handle1() { return row_buffer_; } - /** @brief Returns the OpenCL handle to the column index array */ - handle_type & handle2() { return col_buffer_; } - /** @brief Returns the OpenCL handle to the row index array */ - handle_type & handle3() { return row_indices_; } - /** @brief Returns the OpenCL handle to the matrix entry array */ - handle_type & handle() { return elements_; } - - void switch_memory_context(viennacl::context new_ctx) - { - viennacl::backend::switch_memory_context<unsigned int>(row_buffer_, new_ctx); - viennacl::backend::switch_memory_context<unsigned int>(row_indices_, new_ctx); - viennacl::backend::switch_memory_context<unsigned int>(col_buffer_, new_ctx); - viennacl::backend::switch_memory_context<NumericT>(elements_, new_ctx); - } - - viennacl::memory_types memory_context() const - { - return row_buffer_.get_active_handle_id(); - } - -private: - - vcl_size_t rows_; - vcl_size_t cols_; - vcl_size_t nonzero_rows_; - vcl_size_t nonzeros_; - handle_type row_buffer_; - handle_type row_indices_; - handle_type col_buffer_; - handle_type elements_; -}; - - - -// -// Specify available operations: -// - -/** \cond */ - -namespace linalg -{ -namespace detail -{ - // x = A * y - template<typename T> - struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> const & rhs) - { - // check for the special case x = A * x - if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); - lhs = temp; - } - else - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(0)); - } - }; - - template<typename T> - struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> const & rhs) - { - // check for the special case x += A * x - if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); - lhs += temp; - } - else - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(1)); - } - }; - - template<typename T> - struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> const & rhs) - { - // check for the special case x -= A * x - if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); - lhs -= temp; - } - else - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(-1), lhs, T(1)); - } - }; - - - // x = A * vec_op - template<typename T, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_compressed_matrix<T>, const vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs()); - viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); - } - }; - - // x = A * vec_op - template<typename T, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_compressed_matrix<T>, vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); - viennacl::vector<T> temp_result(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); - lhs += temp_result; - } - }; - - // x = A * vec_op - template<typename T, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_compressed_matrix<T>, const vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); - viennacl::vector<T> temp_result(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); - lhs -= temp_result; - } - }; - -} // namespace detail -} // namespace linalg - -/** \endcond */ -} - -#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/compressed_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/compressed_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/compressed_matrix.hpp deleted file mode 100644 index e42f552..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/compressed_matrix.hpp +++ /dev/null @@ -1,1178 +0,0 @@ -#ifndef VIENNACL_COMPRESSED_MATRIX_HPP_ -#define VIENNACL_COMPRESSED_MATRIX_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/compressed_matrix.hpp - @brief Implementation of the compressed_matrix class -*/ - -#include <vector> -#include <list> -#include <map> -#include "viennacl/forwards.h" -#include "viennacl/vector.hpp" - -#include "viennacl/linalg/sparse_matrix_operations.hpp" - -#include "viennacl/tools/tools.hpp" -#include "viennacl/tools/entry_proxy.hpp" - -#ifdef VIENNACL_WITH_UBLAS -#include <boost/numeric/ublas/matrix_sparse.hpp> -#endif - -namespace viennacl -{ -namespace detail -{ - - /** @brief Implementation of the copy of a host-based sparse matrix to the device. - * - * See convenience copy() routines for type requirements of CPUMatrixT - */ - template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> - void copy_impl(const CPUMatrixT & cpu_matrix, - compressed_matrix<NumericT, AlignmentV> & gpu_matrix, - vcl_size_t nonzeros) - { - assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); - - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), cpu_matrix.size1() + 1); - viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), nonzeros); - std::vector<NumericT> elements(nonzeros); - - vcl_size_t row_index = 0; - vcl_size_t data_index = 0; - - for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); - row_it != cpu_matrix.end1(); - ++row_it) - { - row_buffer.set(row_index, data_index); - ++row_index; - - for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); - col_it != row_it.end(); - ++col_it) - { - col_buffer.set(data_index, col_it.index2()); - elements[data_index] = *col_it; - ++data_index; - } - data_index = viennacl::tools::align_to_multiple<vcl_size_t>(data_index, AlignmentV); //take care of alignment - } - row_buffer.set(row_index, data_index); - - gpu_matrix.set(row_buffer.get(), - col_buffer.get(), - &elements[0], - cpu_matrix.size1(), - cpu_matrix.size2(), - nonzeros); - } -} - -// -// host to device: -// - -//provide copy-operation: -/** @brief Copies a sparse matrix from the host to the OpenCL device (either GPU or multi-core CPU) - * - * There are some type requirements on the CPUMatrixT type (fulfilled by e.g. boost::numeric::ublas): - * - .size1() returns the number of rows - * - .size2() returns the number of columns - * - const_iterator1 is a type definition for an iterator along increasing row indices - * - const_iterator2 is a type definition for an iterator along increasing columns indices - * - The const_iterator1 type provides an iterator of type const_iterator2 via members .begin() and .end() that iterates along column indices in the current row. - * - The types const_iterator1 and const_iterator2 provide members functions .index1() and .index2() that return the current row and column indices respectively. - * - Dereferenciation of an object of type const_iterator2 returns the entry. - * - * @param cpu_matrix A sparse matrix on the host. - * @param gpu_matrix A compressed_matrix from ViennaCL - */ -template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> -void copy(const CPUMatrixT & cpu_matrix, - compressed_matrix<NumericT, AlignmentV> & gpu_matrix ) -{ - if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) - { - //determine nonzeros: - vcl_size_t num_entries = 0; - for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); - row_it != cpu_matrix.end1(); - ++row_it) - { - vcl_size_t entries_per_row = 0; - for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); - col_it != row_it.end(); - ++col_it) - { - ++entries_per_row; - } - num_entries += viennacl::tools::align_to_multiple<vcl_size_t>(entries_per_row, AlignmentV); - } - - if (num_entries == 0) //we copy an empty matrix - num_entries = 1; - - //set up matrix entries: - viennacl::detail::copy_impl(cpu_matrix, gpu_matrix, num_entries); - } -} - - -//adapted for std::vector< std::map < > > argument: -/** @brief Copies a sparse square matrix in the std::vector< std::map < > > format to an OpenCL device. Use viennacl::tools::sparse_matrix_adapter for non-square matrices. - * - * @param cpu_matrix A sparse square matrix on the host using STL types - * @param gpu_matrix A compressed_matrix from ViennaCL - */ -template<typename SizeT, typename NumericT, unsigned int AlignmentV> -void copy(const std::vector< std::map<SizeT, NumericT> > & cpu_matrix, - compressed_matrix<NumericT, AlignmentV> & gpu_matrix ) -{ - vcl_size_t nonzeros = 0; - vcl_size_t max_col = 0; - for (vcl_size_t i=0; i<cpu_matrix.size(); ++i) - { - if (cpu_matrix[i].size() > 0) - nonzeros += ((cpu_matrix[i].size() - 1) / AlignmentV + 1) * AlignmentV; - if (cpu_matrix[i].size() > 0) - max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first); - } - - viennacl::detail::copy_impl(tools::const_sparse_matrix_adapter<NumericT, SizeT>(cpu_matrix, cpu_matrix.size(), max_col + 1), - gpu_matrix, - nonzeros); -} - -#ifdef VIENNACL_WITH_UBLAS -/** @brief Convenience routine for copying a sparse uBLAS matrix to a ViennaCL matrix. - * - * Optimization which copies the data directly from the internal uBLAS buffers. - */ -template<typename ScalarType, typename F, vcl_size_t IB, typename IA, typename TA> -void copy(const boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA, TA> & ublas_matrix, - viennacl::compressed_matrix<ScalarType, 1> & gpu_matrix) -{ - assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); - - //we just need to copy the CSR arrays: - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), ublas_matrix.size1() + 1); - for (vcl_size_t i=0; i<=ublas_matrix.size1(); ++i) - row_buffer.set(i, ublas_matrix.index1_data()[i]); - - viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), ublas_matrix.nnz()); - for (vcl_size_t i=0; i<ublas_matrix.nnz(); ++i) - col_buffer.set(i, ublas_matrix.index2_data()[i]); - - gpu_matrix.set(row_buffer.get(), - col_buffer.get(), - &(ublas_matrix.value_data()[0]), - ublas_matrix.size1(), - ublas_matrix.size2(), - ublas_matrix.nnz()); - -} -#endif - -#ifdef VIENNACL_WITH_ARMADILLO -/** @brief Convenience routine for copying a sparse Armadillo matrix to a ViennaCL matrix. - * - * Since Armadillo uses a column-major format, while ViennaCL uses row-major, we need to transpose. - * This is done fairly efficiently working on the CSR arrays directly, rather than (slowly) building an STL matrix. - */ -template<typename NumericT, unsigned int AlignmentV> -void copy(arma::SpMat<NumericT> const & arma_matrix, - viennacl::compressed_matrix<NumericT, AlignmentV> & vcl_matrix) -{ - assert( (vcl_matrix.size1() == 0 || static_cast<vcl_size_t>(arma_matrix.n_rows) == vcl_matrix.size1()) && bool("Size mismatch") ); - assert( (vcl_matrix.size2() == 0 || static_cast<vcl_size_t>(arma_matrix.n_cols) == vcl_matrix.size2()) && bool("Size mismatch") ); - - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(vcl_matrix.handle1(), arma_matrix.n_rows + 1); - viennacl::backend::typesafe_host_array<unsigned int> col_buffer(vcl_matrix.handle2(), arma_matrix.n_nonzero); - viennacl::backend::typesafe_host_array<NumericT > value_buffer(vcl_matrix.handle(), arma_matrix.n_nonzero); - - // Step 1: Count number of nonzeros in each row - for (vcl_size_t col=0; col < static_cast<vcl_size_t>(arma_matrix.n_cols); ++col) - { - vcl_size_t col_begin = static_cast<vcl_size_t>(arma_matrix.col_ptrs[col]); - vcl_size_t col_end = static_cast<vcl_size_t>(arma_matrix.col_ptrs[col+1]); - for (vcl_size_t i = col_begin; i < col_end; ++i) - { - unsigned int row = arma_matrix.row_indices[i]; - row_buffer.set(row, row_buffer[row] + 1); - } - } - - // Step 2: Exclusive scan on row_buffer to obtain offsets - unsigned int offset = 0; - for (vcl_size_t i=0; i<row_buffer.size(); ++i) - { - unsigned int tmp = row_buffer[i]; - row_buffer.set(i, offset); - offset += tmp; - } - - // Step 3: Fill data - std::vector<unsigned int> row_offsets(arma_matrix.n_rows); - for (vcl_size_t col=0; col < static_cast<vcl_size_t>(arma_matrix.n_cols); ++col) - { - vcl_size_t col_begin = static_cast<vcl_size_t>(arma_matrix.col_ptrs[col]); - vcl_size_t col_end = static_cast<vcl_size_t>(arma_matrix.col_ptrs[col+1]); - for (vcl_size_t i = col_begin; i < col_end; ++i) - { - unsigned int row = arma_matrix.row_indices[i]; - col_buffer.set(row_buffer[row] + row_offsets[row], col); - value_buffer.set(row_buffer[row] + row_offsets[row], arma_matrix.values[i]); - row_offsets[row] += 1; - } - } - - vcl_matrix.set(row_buffer.get(), col_buffer.get(), reinterpret_cast<NumericT*>(value_buffer.get()), - arma_matrix.n_rows, arma_matrix.n_cols, arma_matrix.n_nonzero); -} -#endif - -#ifdef VIENNACL_WITH_EIGEN -/** @brief Convenience routine for copying a sparse Eigen matrix to a ViennaCL matrix. - * - * Builds a temporary STL matrix. Patches for avoiding the temporary matrix welcome. - */ -template<typename NumericT, int flags, unsigned int AlignmentV> -void copy(const Eigen::SparseMatrix<NumericT, flags> & eigen_matrix, - compressed_matrix<NumericT, AlignmentV> & gpu_matrix) -{ - assert( (gpu_matrix.size1() == 0 || static_cast<vcl_size_t>(eigen_matrix.rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (gpu_matrix.size2() == 0 || static_cast<vcl_size_t>(eigen_matrix.cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); - - std::vector< std::map<unsigned int, NumericT> > stl_matrix(eigen_matrix.rows()); - - for (int k=0; k < eigen_matrix.outerSize(); ++k) - for (typename Eigen::SparseMatrix<NumericT, flags>::InnerIterator it(eigen_matrix, k); it; ++it) - stl_matrix[it.row()][it.col()] = it.value(); - - copy(tools::const_sparse_matrix_adapter<NumericT>(stl_matrix, eigen_matrix.rows(), eigen_matrix.cols()), gpu_matrix); -} -#endif - - -#ifdef VIENNACL_WITH_MTL4 -/** @brief Convenience routine for copying a sparse MTL4 matrix to a ViennaCL matrix. - * - * Builds a temporary STL matrix for the copy. Patches for avoiding the temporary matrix welcome. - */ -template<typename NumericT, unsigned int AlignmentV> -void copy(const mtl::compressed2D<NumericT> & cpu_matrix, - compressed_matrix<NumericT, AlignmentV> & gpu_matrix) -{ - assert( (gpu_matrix.size1() == 0 || static_cast<vcl_size_t>(cpu_matrix.num_rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (gpu_matrix.size2() == 0 || static_cast<vcl_size_t>(cpu_matrix.num_cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); - - typedef mtl::compressed2D<NumericT> MatrixType; - - std::vector< std::map<unsigned int, NumericT> > stl_matrix(cpu_matrix.num_rows()); - - using mtl::traits::range_generator; - using mtl::traits::range::min; - - // Choose between row and column traversal - typedef typename min<range_generator<mtl::tag::row, MatrixType>, - range_generator<mtl::tag::col, MatrixType> >::type range_type; - range_type my_range; - - // Type of outer cursor - typedef typename range_type::type c_type; - // Type of inner cursor - typedef typename mtl::traits::range_generator<mtl::tag::nz, c_type>::type ic_type; - - // Define the property maps - typename mtl::traits::row<MatrixType>::type row(cpu_matrix); - typename mtl::traits::col<MatrixType>::type col(cpu_matrix); - typename mtl::traits::const_value<MatrixType>::type value(cpu_matrix); - - // Now iterate over the matrix - for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor) - for (ic_type icursor(mtl::begin<mtl::tag::nz>(cursor)), icend(mtl::end<mtl::tag::nz>(cursor)); icursor != icend; ++icursor) - stl_matrix[row(*icursor)][col(*icursor)] = value(*icursor); - - copy(tools::const_sparse_matrix_adapter<NumericT>(stl_matrix, cpu_matrix.num_rows(), cpu_matrix.num_cols()), gpu_matrix); -} -#endif - - - - - - - -// -// device to host: -// -/** @brief Copies a sparse matrix from the OpenCL device (either GPU or multi-core CPU) to the host. - * - * There are two type requirements on the CPUMatrixT type (fulfilled by e.g. boost::numeric::ublas): - * - resize(rows, cols) A resize function to bring the matrix into the correct size - * - operator(i,j) Write new entries via the parenthesis operator - * - * @param gpu_matrix A compressed_matrix from ViennaCL - * @param cpu_matrix A sparse matrix on the host. - */ -template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> -void copy(const compressed_matrix<NumericT, AlignmentV> & gpu_matrix, - CPUMatrixT & cpu_matrix ) -{ - assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); - - if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) - { - //get raw data from memory: - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), cpu_matrix.size1() + 1); - viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); - std::vector<NumericT> elements(gpu_matrix.nnz()); - - //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl; - - viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); - viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); - viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0])); - - //fill the cpu_matrix: - vcl_size_t data_index = 0; - for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) - { - while (data_index < row_buffer[row]) - { - if (col_buffer[data_index] >= gpu_matrix.size2()) - { - std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl; - return; - } - - if (std::fabs(elements[data_index]) > static_cast<NumericT>(0)) - cpu_matrix(row-1, static_cast<vcl_size_t>(col_buffer[data_index])) = elements[data_index]; - ++data_index; - } - } - } -} - - -/** @brief Copies a sparse matrix from an OpenCL device to the host. The host type is the std::vector< std::map < > > format . - * - * @param gpu_matrix A compressed_matrix from ViennaCL - * @param cpu_matrix A sparse matrix on the host. - */ -template<typename NumericT, unsigned int AlignmentV> -void copy(const compressed_matrix<NumericT, AlignmentV> & gpu_matrix, - std::vector< std::map<unsigned int, NumericT> > & cpu_matrix) -{ - assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Size mismatch") ); - - tools::sparse_matrix_adapter<NumericT> temp(cpu_matrix, gpu_matrix.size1(), gpu_matrix.size2()); - copy(gpu_matrix, temp); -} - -#ifdef VIENNACL_WITH_UBLAS -/** @brief Convenience routine for copying a ViennaCL sparse matrix back to a sparse uBLAS matrix - * - * Directly populates the internal buffer of the uBLAS matrix, thus avoiding a temporary STL matrix. - */ -template<typename ScalarType, unsigned int AlignmentV, typename F, vcl_size_t IB, typename IA, typename TA> -void copy(viennacl::compressed_matrix<ScalarType, AlignmentV> const & gpu_matrix, - boost::numeric::ublas::compressed_matrix<ScalarType> & ublas_matrix) -{ - assert( (viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); - - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); - viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); - - viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); - viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); - - ublas_matrix.clear(); - ublas_matrix.reserve(gpu_matrix.nnz()); - - ublas_matrix.set_filled(gpu_matrix.size1() + 1, gpu_matrix.nnz()); - - for (vcl_size_t i=0; i<ublas_matrix.size1() + 1; ++i) - ublas_matrix.index1_data()[i] = row_buffer[i]; - - for (vcl_size_t i=0; i<ublas_matrix.nnz(); ++i) - ublas_matrix.index2_data()[i] = col_buffer[i]; - - viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(ScalarType) * gpu_matrix.nnz(), &(ublas_matrix.value_data()[0])); - -} -#endif - -#ifdef VIENNACL_WITH_ARMADILLO -/** @brief Convenience routine for copying a ViennaCL sparse matrix back to a sparse Armadillo matrix. - * - * Performance notice: Inserting the row-major data from the ViennaCL matrix to the column-major Armadillo-matrix is likely to be slow. - * However, since this operation is unlikely to be performance-critical, further optimizations are postponed. -*/ -template<typename NumericT, unsigned int AlignmentV> -void copy(viennacl::compressed_matrix<NumericT, AlignmentV> & vcl_matrix, - arma::SpMat<NumericT> & arma_matrix) -{ - assert( (static_cast<vcl_size_t>(arma_matrix.n_rows) == vcl_matrix.size1()) && bool("Size mismatch") ); - assert( (static_cast<vcl_size_t>(arma_matrix.n_cols) == vcl_matrix.size2()) && bool("Size mismatch") ); - - if ( vcl_matrix.size1() > 0 && vcl_matrix.size2() > 0 ) - { - //get raw data from memory: - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(vcl_matrix.handle1(), vcl_matrix.size1() + 1); - viennacl::backend::typesafe_host_array<unsigned int> col_buffer(vcl_matrix.handle2(), vcl_matrix.nnz()); - viennacl::backend::typesafe_host_array<NumericT> elements (vcl_matrix.handle(), vcl_matrix.nnz()); - - viennacl::backend::memory_read(vcl_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); - viennacl::backend::memory_read(vcl_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); - viennacl::backend::memory_read(vcl_matrix.handle(), 0, elements.raw_size(), elements.get()); - - arma_matrix.zeros(); - vcl_size_t data_index = 0; - for (vcl_size_t row = 1; row <= vcl_matrix.size1(); ++row) - { - while (data_index < row_buffer[row]) - { - assert(col_buffer[data_index] < vcl_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer")); - if (elements[data_index] != static_cast<NumericT>(0.0)) - arma_matrix(row-1, col_buffer[data_index]) = elements[data_index]; - ++data_index; - } - } - } -} -#endif - -#ifdef VIENNACL_WITH_EIGEN -/** @brief Convenience routine for copying a ViennaCL sparse matrix back to a sparse Eigen matrix */ -template<typename NumericT, int flags, unsigned int AlignmentV> -void copy(compressed_matrix<NumericT, AlignmentV> & gpu_matrix, - Eigen::SparseMatrix<NumericT, flags> & eigen_matrix) -{ - assert( (static_cast<vcl_size_t>(eigen_matrix.rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (static_cast<vcl_size_t>(eigen_matrix.cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); - - if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) - { - //get raw data from memory: - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); - viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); - std::vector<NumericT> elements(gpu_matrix.nnz()); - - viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); - viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); - viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0])); - - eigen_matrix.setZero(); - vcl_size_t data_index = 0; - for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) - { - while (data_index < row_buffer[row]) - { - assert(col_buffer[data_index] < gpu_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer")); - if (elements[data_index] != static_cast<NumericT>(0.0)) - eigen_matrix.insert(row-1, col_buffer[data_index]) = elements[data_index]; - ++data_index; - } - } - } -} -#endif - - - -#ifdef VIENNACL_WITH_MTL4 -/** @brief Convenience routine for copying a ViennaCL sparse matrix back to a sparse MTL4 matrix */ -template<typename NumericT, unsigned int AlignmentV> -void copy(compressed_matrix<NumericT, AlignmentV> & gpu_matrix, - mtl::compressed2D<NumericT> & mtl4_matrix) -{ - assert( (static_cast<vcl_size_t>(mtl4_matrix.num_rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); - assert( (static_cast<vcl_size_t>(mtl4_matrix.num_cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); - - if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) - { - - //get raw data from memory: - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); - viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); - std::vector<NumericT> elements(gpu_matrix.nnz()); - - viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); - viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); - viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0])); - - //set_to_zero(mtl4_matrix); - //mtl4_matrix.change_dim(gpu_matrix.size1(), gpu_matrix.size2()); - - mtl::matrix::inserter< mtl::compressed2D<NumericT> > ins(mtl4_matrix); - vcl_size_t data_index = 0; - for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) - { - while (data_index < row_buffer[row]) - { - assert(col_buffer[data_index] < gpu_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer")); - if (elements[data_index] != static_cast<NumericT>(0.0)) - ins(row-1, col_buffer[data_index]) << typename mtl::Collection< mtl::compressed2D<NumericT> >::value_type(elements[data_index]); - ++data_index; - } - } - } -} -#endif - - - - - -//////////////////////// compressed_matrix ////////////////////////// -/** @brief A sparse square matrix in compressed sparse rows format. - * - * @tparam NumericT The floating point type (either float or double, checked at compile time) - * @tparam AlignmentV The internal memory size for the entries in each row is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. - */ -template<class NumericT, unsigned int AlignmentV /* see VCLForwards.h */> -class compressed_matrix -{ -public: - typedef viennacl::backend::mem_handle handle_type; - typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type; - typedef vcl_size_t size_type; - - /** @brief Default construction of a compressed matrix. No memory is allocated */ - compressed_matrix() : rows_(0), cols_(0), nonzeros_(0), row_block_num_(0) {} - - /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated - * - * @param rows Number of rows - * @param cols Number of columns - * @param nonzeros Optional number of nonzeros for memory preallocation - * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) - */ - explicit compressed_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros = 0, viennacl::context ctx = viennacl::context()) - : rows_(rows), cols_(cols), nonzeros_(nonzeros), row_block_num_(0) - { - row_buffer_.switch_active_handle_id(ctx.memory_type()); - col_buffer_.switch_active_handle_id(ctx.memory_type()); - elements_.switch_active_handle_id(ctx.memory_type()); - row_blocks_.switch_active_handle_id(ctx.memory_type()); - -#ifdef VIENNACL_WITH_OPENCL - if (ctx.memory_type() == OPENCL_MEMORY) - { - row_buffer_.opencl_handle().context(ctx.opencl_context()); - col_buffer_.opencl_handle().context(ctx.opencl_context()); - elements_.opencl_handle().context(ctx.opencl_context()); - row_blocks_.opencl_handle().context(ctx.opencl_context()); - } -#endif - if (rows > 0) - { - viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows + 1), ctx); - viennacl::vector_base<unsigned int> init_temporary(row_buffer_, size_type(rows+1), 0, 1); - init_temporary = viennacl::zero_vector<unsigned int>(size_type(rows+1), ctx); - } - if (nonzeros > 0) - { - viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * nonzeros, ctx); - viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros, ctx); - } - } - - /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated - * - * @param rows Number of rows - * @param cols Number of columns - * @param ctx Context in which to create the matrix - */ - explicit compressed_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx) - : rows_(rows), cols_(cols), nonzeros_(0), row_block_num_(0) - { - row_buffer_.switch_active_handle_id(ctx.memory_type()); - col_buffer_.switch_active_handle_id(ctx.memory_type()); - elements_.switch_active_handle_id(ctx.memory_type()); - row_blocks_.switch_active_handle_id(ctx.memory_type()); - -#ifdef VIENNACL_WITH_OPENCL - if (ctx.memory_type() == OPENCL_MEMORY) - { - row_buffer_.opencl_handle().context(ctx.opencl_context()); - col_buffer_.opencl_handle().context(ctx.opencl_context()); - elements_.opencl_handle().context(ctx.opencl_context()); - row_blocks_.opencl_handle().context(ctx.opencl_context()); - } -#endif - if (rows > 0) - { - viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows + 1), ctx); - viennacl::vector_base<unsigned int> init_temporary(row_buffer_, size_type(rows+1), 0, 1); - init_temporary = viennacl::zero_vector<unsigned int>(size_type(rows+1), ctx); - } - } - - /** @brief Creates an empty compressed_matrix, but sets the respective context information. - * - * This is useful if you want to want to populate e.g. a viennacl::compressed_matrix<> on the host with copy(), but the default backend is OpenCL. - */ - explicit compressed_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzeros_(0), row_block_num_(0) - { - row_buffer_.switch_active_handle_id(ctx.memory_type()); - col_buffer_.switch_active_handle_id(ctx.memory_type()); - elements_.switch_active_handle_id(ctx.memory_type()); - row_blocks_.switch_active_handle_id(ctx.memory_type()); - -#ifdef VIENNACL_WITH_OPENCL - if (ctx.memory_type() == OPENCL_MEMORY) - { - row_buffer_.opencl_handle().context(ctx.opencl_context()); - col_buffer_.opencl_handle().context(ctx.opencl_context()); - elements_.opencl_handle().context(ctx.opencl_context()); - row_blocks_.opencl_handle().context(ctx.opencl_context()); - } -#endif - } - - -#ifdef VIENNACL_WITH_OPENCL - /** @brief Wraps existing OpenCL buffers holding the compressed sparse row information. - * - * @param mem_row_buffer A buffer consisting of unsigned integers (cl_uint) holding the entry points for each row (0-based indexing). (rows+1) elements, the last element being 'nonzeros'. - * @param mem_col_buffer A buffer consisting of unsigned integers (cl_uint) holding the column index for each nonzero entry as stored in 'mem_elements'. - * @param mem_elements A buffer holding the floating point numbers for nonzeros. OpenCL type of elements must match the template 'NumericT'. - * @param rows Number of rows in the matrix to be wrapped. - * @param cols Number of columns to be wrapped. - * @param nonzeros Number of nonzero entries in the matrix. - */ - explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements, - vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros) : - rows_(rows), cols_(cols), nonzeros_(nonzeros), row_block_num_(0) - { - row_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); - row_buffer_.opencl_handle() = mem_row_buffer; - row_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. - row_buffer_.raw_size(sizeof(cl_uint) * (rows + 1)); - - col_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); - col_buffer_.opencl_handle() = mem_col_buffer; - col_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. - col_buffer_.raw_size(sizeof(cl_uint) * nonzeros); - - elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); - elements_.opencl_handle() = mem_elements; - elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. - elements_.raw_size(sizeof(NumericT) * nonzeros); - - //generate block information for CSR-adaptive: - generate_row_block_information(); - } -#endif - - /** @brief Assignment a compressed matrix from the product of two compressed_matrix objects (C = A * B). */ - compressed_matrix(matrix_expression<const compressed_matrix, const compressed_matrix, op_prod> const & proxy) - : rows_(0), cols_(0), nonzeros_(0), row_block_num_(0) - { - viennacl::context ctx = viennacl::traits::context(proxy.lhs()); - - row_buffer_.switch_active_handle_id(ctx.memory_type()); - col_buffer_.switch_active_handle_id(ctx.memory_type()); - elements_.switch_active_handle_id(ctx.memory_type()); - row_blocks_.switch_active_handle_id(ctx.memory_type()); - -#ifdef VIENNACL_WITH_OPENCL - if (ctx.memory_type() == OPENCL_MEMORY) - { - row_buffer_.opencl_handle().context(ctx.opencl_context()); - col_buffer_.opencl_handle().context(ctx.opencl_context()); - elements_.opencl_handle().context(ctx.opencl_context()); - row_blocks_.opencl_handle().context(ctx.opencl_context()); - } -#endif - - viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); - generate_row_block_information(); - } - - /** @brief Assignment a compressed matrix from possibly another memory domain. */ - compressed_matrix & operator=(compressed_matrix const & other) - { - assert( (rows_ == 0 || rows_ == other.size1()) && bool("Size mismatch") ); - assert( (cols_ == 0 || cols_ == other.size2()) && bool("Size mismatch") ); - - rows_ = other.size1(); - cols_ = other.size2(); - nonzeros_ = other.nnz(); - row_block_num_ = other.row_block_num_; - - viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_buffer_, row_buffer_); - viennacl::backend::typesafe_memory_copy<unsigned int>(other.col_buffer_, col_buffer_); - viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_blocks_, row_blocks_); - viennacl::backend::typesafe_memory_copy<NumericT>(other.elements_, elements_); - - return *this; - } - - /** @brief Assignment a compressed matrix from the product of two compressed_matrix objects (C = A * B). */ - compressed_matrix & operator=(matrix_expression<const compressed_matrix, const compressed_matrix, op_prod> const & proxy) - { - assert( (rows_ == 0 || rows_ == proxy.lhs().size1()) && bool("Size mismatch") ); - assert( (cols_ == 0 || cols_ == proxy.rhs().size2()) && bool("Size mismatch") ); - - viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); - generate_row_block_information(); - - return *this; - } - - - /** @brief Sets the row, column and value arrays of the compressed matrix - * - * Type of row_jumper and col_buffer is 'unsigned int' for CUDA and OpenMP (host) backend, but *must* be cl_uint for OpenCL. - * The reason is that 'unsigned int' might have a different bit representation on the host than 'unsigned int' on the OpenCL device. - * cl_uint is guaranteed to have the correct bit representation for OpenCL devices. - * - * @param row_jumper Pointer to an array holding the indices of the first element of each row (starting with zero). E.g. row_jumper[10] returns the index of the first entry of the 11th row. The array length is 'cols + 1' - * @param col_buffer Pointer to an array holding the column index of each entry. The array length is 'nonzeros' - * @param elements Pointer to an array holding the entries of the sparse matrix. The array length is 'elements' - * @param rows Number of rows of the sparse matrix - * @param cols Number of columns of the sparse matrix - * @param nonzeros Number of nonzeros - */ - void set(const void * row_jumper, - const void * col_buffer, - const NumericT * elements, - vcl_size_t rows, - vcl_size_t cols, - vcl_size_t nonzeros) - { - assert( (rows > 0) && bool("Error in compressed_matrix::set(): Number of rows must be larger than zero!")); - assert( (cols > 0) && bool("Error in compressed_matrix::set(): Number of columns must be larger than zero!")); - assert( (nonzeros > 0) && bool("Error in compressed_matrix::set(): Number of nonzeros must be larger than zero!")); - //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl; - - //row_buffer_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); - viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>(row_buffer_).element_size() * (rows + 1), viennacl::traits::context(row_buffer_), row_jumper); - - //col_buffer_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); - viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>(col_buffer_).element_size() * nonzeros, viennacl::traits::context(col_buffer_), col_buffer); - - //elements_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); - viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros, viennacl::traits::context(elements_), elements); - - nonzeros_ = nonzeros; - rows_ = rows; - cols_ = cols; - - //generate block information for CSR-adaptive: - generate_row_block_information(); - } - - /** @brief Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved. */ - void reserve(vcl_size_t new_nonzeros, bool preserve = true) - { - if (new_nonzeros > nonzeros_) - { - if (preserve) - { - handle_type col_buffer_old; - handle_type elements_old; - viennacl::backend::memory_shallow_copy(col_buffer_, col_buffer_old); - viennacl::backend::memory_shallow_copy(elements_, elements_old); - - viennacl::backend::typesafe_host_array<unsigned int> size_deducer(col_buffer_); - viennacl::backend::memory_create(col_buffer_, size_deducer.element_size() * new_nonzeros, viennacl::traits::context(col_buffer_)); - viennacl::backend::memory_create(elements_, sizeof(NumericT) * new_nonzeros, viennacl::traits::context(elements_)); - - viennacl::backend::memory_copy(col_buffer_old, col_buffer_, 0, 0, size_deducer.element_size() * nonzeros_); - viennacl::backend::memory_copy(elements_old, elements_, 0, 0, sizeof(NumericT)* nonzeros_); - } - else - { - viennacl::backend::typesafe_host_array<unsigned int> size_deducer(col_buffer_); - viennacl::backend::memory_create(col_buffer_, size_deducer.element_size() * new_nonzeros, viennacl::traits::context(col_buffer_)); - viennacl::backend::memory_create(elements_, sizeof(NumericT) * new_nonzeros, viennacl::traits::context(elements_)); - } - - nonzeros_ = new_nonzeros; - } - } - - /** @brief Resize the matrix. - * - * @param new_size1 New number of rows - * @param new_size2 New number of columns - * @param preserve If true, the old values are preserved. At present, old values are always discarded. - */ - void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve = true) - { - assert(new_size1 > 0 && new_size2 > 0 && bool("Cannot resize to zero size!")); - - if (new_size1 != rows_ || new_size2 != cols_) - { - if (!preserve) - { - viennacl::backend::typesafe_host_array<unsigned int> host_row_buffer(row_buffer_, new_size1 + 1); - viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (new_size1 + 1), viennacl::traits::context(row_buffer_), host_row_buffer.get()); - // faster version without initializing memory: - //viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (new_size1 + 1), viennacl::traits::context(row_buffer_)); - nonzeros_ = 0; - } - else - { - std::vector<std::map<unsigned int, NumericT> > stl_sparse_matrix; - if (rows_ > 0) - { - stl_sparse_matrix.resize(rows_); - viennacl::copy(*this, stl_sparse_matrix); - } else { - stl_sparse_matrix.resize(new_size1); - stl_sparse_matrix[0][0] = 0; //enforces nonzero array sizes if matrix was initially empty - } - - stl_sparse_matrix.resize(new_size1); - - //discard entries with column index larger than new_size2 - if (new_size2 < cols_ && rows_ > 0) - { - for (vcl_size_t i=0; i<stl_sparse_matrix.size(); ++i) - { - std::list<unsigned int> to_delete; - for (typename std::map<unsigned int, NumericT>::iterator it = stl_sparse_matrix[i].begin(); - it != stl_sparse_matrix[i].end(); - ++it) - { - if (it->first >= new_size2) - to_delete.push_back(it->first); - } - - for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it) - stl_sparse_matrix[i].erase(*it); - } - } - - viennacl::tools::sparse_matrix_adapter<NumericT> adapted_matrix(stl_sparse_matrix, new_size1, new_size2); - rows_ = new_size1; - cols_ = new_size2; - viennacl::copy(adapted_matrix, *this); - } - - rows_ = new_size1; - cols_ = new_size2; - } - } - - /** @brief Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity pattern. */ - void clear() - { - viennacl::backend::typesafe_host_array<unsigned int> host_row_buffer(row_buffer_, rows_ + 1); - viennacl::backend::typesafe_host_array<unsigned int> host_col_buffer(col_buffer_, 1); - std::vector<NumericT> host_elements(1); - - viennacl::backend::memory_create(row_buffer_, host_row_buffer.element_size() * (rows_ + 1), viennacl::traits::context(row_buffer_), host_row_buffer.get()); - viennacl::backend::memory_create(col_buffer_, host_col_buffer.element_size() * 1, viennacl::traits::context(col_buffer_), host_col_buffer.get()); - viennacl::backend::memory_create(elements_, sizeof(NumericT) * 1, viennacl::traits::context(elements_), &(host_elements[0])); - - nonzeros_ = 0; - } - - /** @brief Returns a reference to the (i,j)-th entry of the sparse matrix. If (i,j) does not exist (zero), it is inserted (slow!) */ - entry_proxy<NumericT> operator()(vcl_size_t i, vcl_size_t j) - { - assert( (i < rows_) && (j < cols_) && bool("compressed_matrix access out of bounds!")); - - vcl_size_t index = element_index(i, j); - - // check for element in sparsity pattern - if (index < nonzeros_) - return entry_proxy<NumericT>(index, elements_); - - // Element not found. Copying required. Very slow, but direct entry manipulation is painful anyway... - std::vector< std::map<unsigned int, NumericT> > cpu_backup(rows_); - tools::sparse_matrix_adapter<NumericT> adapted_cpu_backup(cpu_backup, rows_, cols_); - viennacl::copy(*this, adapted_cpu_backup); - cpu_backup[i][static_cast<unsigned int>(j)] = 0.0; - viennacl::copy(adapted_cpu_backup, *this); - - index = element_index(i, j); - - assert(index < nonzeros_); - - return entry_proxy<NumericT>(index, elements_); - } - - /** @brief Returns the number of rows */ - const vcl_size_t & size1() const { return rows_; } - /** @brief Returns the number of columns */ - const vcl_size_t & size2() const { return cols_; } - /** @brief Returns the number of nonzero entries */ - const vcl_size_t & nnz() const { return nonzeros_; } - /** @brief Returns the internal number of row blocks for an adaptive SpMV */ - const vcl_size_t & blocks1() const { return row_block_num_; } - - /** @brief Returns the OpenCL handle to the row index array */ - const handle_type & handle1() const { return row_buffer_; } - /** @brief Returns the OpenCL handle to the column index array */ - const handle_type & handle2() const { return col_buffer_; } - /** @brief Returns the OpenCL handle to the row block array */ - const handle_type & handle3() const { return row_blocks_; } - /** @brief Returns the OpenCL handle to the matrix entry array */ - const handle_type & handle() const { return elements_; } - - /** @brief Returns the OpenCL handle to the row index array */ - handle_type & handle1() { return row_buffer_; } - /** @brief Returns the OpenCL handle to the column index array */ - handle_type & handle2() { return col_buffer_; } - /** @brief Returns the OpenCL handle to the row block array */ - handle_type & handle3() { return row_blocks_; } - /** @brief Returns the OpenCL handle to the matrix entry array */ - handle_type & handle() { return elements_; } - - /** @brief Switches the memory context of the matrix. - * - * Allows for e.g. an migration of the full matrix from OpenCL memory to host memory for e.g. computing a preconditioner. - */ - void switch_memory_context(viennacl::context new_ctx) - { - viennacl::backend::switch_memory_context<unsigned int>(row_buffer_, new_ctx); - viennacl::backend::switch_memory_context<unsigned int>(col_buffer_, new_ctx); - viennacl::backend::switch_memory_context<unsigned int>(row_blocks_, new_ctx); - viennacl::backend::switch_memory_context<NumericT>(elements_, new_ctx); - } - - /** @brief Returns the current memory context to determine whether the matrix is set up for OpenMP, OpenCL, or CUDA. */ - viennacl::memory_types memory_context() const - { - return row_buffer_.get_active_handle_id(); - } - -private: - - /** @brief Helper function for accessing the element (i,j) of the matrix. */ - vcl_size_t element_index(vcl_size_t i, vcl_size_t j) - { - //read row indices - viennacl::backend::typesafe_host_array<unsigned int> row_indices(row_buffer_, 2); - viennacl::backend::memory_read(row_buffer_, row_indices.element_size()*i, row_indices.element_size()*2, row_indices.get()); - - //get column indices for row i: - viennacl::backend::typesafe_host_array<unsigned int> col_indices(col_buffer_, row_indices[1] - row_indices[0]); - viennacl::backend::memory_read(col_buffer_, col_indices.element_size()*row_indices[0], row_indices.element_size()*col_indices.size(), col_indices.get()); - - for (vcl_size_t k=0; k<col_indices.size(); ++k) - { - if (col_indices[k] == j) - return row_indices[0] + k; - } - - // if not found, return index past the end of the matrix (cf. matrix.end() in the spirit of the STL) - return nonzeros_; - } - -public: - /** @brief Builds the row block information needed for fast sparse matrix-vector multiplications. - * - * Required when manually populating the memory buffers with values. Not necessary when using viennacl::copy() or .set() - */ - void generate_row_block_information() - { - viennacl::backend::typesafe_host_array<unsigned int> row_buffer(row_buffer_, rows_ + 1); - viennacl::backend::memory_read(row_buffer_, 0, row_buffer.raw_size(), row_buffer.get()); - - viennacl::backend::typesafe_host_array<unsigned int> row_blocks(row_buffer_, rows_ + 1); - - vcl_size_t num_entries_in_current_batch = 0; - - const vcl_size_t shared_mem_size = 1024; // number of column indices loaded to shared memory, number of floating point values loaded to shared memory - - row_block_num_ = 0; - row_blocks.set(0, 0); - for (vcl_size_t i=0; i<rows_; ++i) - { - vcl_size_t entries_in_row = vcl_size_t(row_buffer[i+1]) - vcl_size_t(row_buffer[i]); - num_entries_in_current_batch += entries_in_row; - - if (num_entries_in_current_batch > shared_mem_size) - { - vcl_size_t rows_in_batch = i - row_blocks[row_block_num_]; - if (rows_in_batch > 0) // at least one full row is in the batch. Use current row in next batch. - row_blocks.set(++row_block_num_, i--); - else // row is larger than buffer in shared memory - row_blocks.set(++row_block_num_, i+1); - num_entries_in_current_batch = 0; - } - } - if (num_entries_in_current_batch > 0) - row_blocks.set(++row_block_num_, rows_); - - if (row_block_num_ > 0) //matrix might be empty... - viennacl::backend::memory_create(row_blocks_, - row_blocks.element_size() * (row_block_num_ + 1), - viennacl::traits::context(row_buffer_), row_blocks.get()); - - } - -private: - // /** @brief Copy constructor is by now not available. */ - //compressed_matrix(compressed_matrix const &); - -private: - - vcl_size_t rows_; - vcl_size_t cols_; - vcl_size_t nonzeros_; - vcl_size_t row_block_num_; - handle_type row_buffer_; - handle_type row_blocks_; - handle_type col_buffer_; - handle_type elements_; -}; - -/** @brief Output stream support for compressed_matrix. Output format is same as MATLAB, Octave, or SciPy - * - * @param os STL output stream - * @param A The compressed matrix to be printed. -*/ -template<typename NumericT, unsigned int AlignmentV> -std::ostream & operator<<(std::ostream & os, compressed_matrix<NumericT, AlignmentV> const & A) -{ - std::vector<std::map<unsigned int, NumericT> > tmp(A.size1()); - viennacl::copy(A, tmp); - os << "compressed_matrix of size (" << A.size1() << ", " << A.size2() << ") with " << A.nnz() << " nonzeros:" << std::endl; - - for (vcl_size_t i=0; i<A.size1(); ++i) - { - for (typename std::map<unsigned int, NumericT>::const_iterator it = tmp[i].begin(); it != tmp[i].end(); ++it) - os << " (" << i << ", " << it->first << ")\t" << it->second << std::endl; - } - return os; -} - -// -// Specify available operations: -// - -/** \cond */ - -namespace linalg -{ -namespace detail -{ - // x = A * y - template<typename T, unsigned int A> - struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) - { - // check for the special case x = A * x - if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); - lhs = temp; - } - else - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(0)); - } - }; - - template<typename T, unsigned int A> - struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) - { - // check for the special case x += A * x - if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); - lhs += temp; - } - else - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(1)); - } - }; - - template<typename T, unsigned int A> - struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) - { - // check for the special case x -= A * x - if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) - { - viennacl::vector<T> temp(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); - lhs -= temp; - } - else - viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(-1), lhs, T(1)); - } - }; - - - // x = A * vec_op - template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); - viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); - } - }; - - // x = A * vec_op - template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); - viennacl::vector<T> temp_result(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); - lhs += temp_result; - } - }; - - // x = A * vec_op - template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> - struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > - { - static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) - { - viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); - viennacl::vector<T> temp_result(lhs); - viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); - lhs -= temp_result; - } - }; - -} // namespace detail -} // namespace linalg - /** \endcond */ -} - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/context.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/context.hpp b/native-viennaCL/src/main/cpp/viennacl/context.hpp deleted file mode 100644 index ed00c39..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/context.hpp +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef VIENNACL_CONTEXT_HPP_ -#define VIENNACL_CONTEXT_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/context.hpp - @brief Implementation of a OpenCL-like context, which serves as a unification of {OpenMP, CUDA, OpenCL} at the user API. -*/ - -#include <vector> -#include <stddef.h> -#include <assert.h> -#include "viennacl/forwards.h" -#include "viennacl/ocl/forwards.h" -#include "viennacl/backend/mem_handle.hpp" - -namespace viennacl -{ -/** @brief Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also suitable for CUDA and OpenMP - * - * Context objects are used to distinguish between different memory domains. One context may refer to an OpenCL device, another context may refer to a CUDA device, and a third context to main RAM. - * Thus, operations are only defined on objects residing on the same context. - */ -class context -{ -public: - context() : mem_type_(viennacl::backend::default_memory_type()) - { -#ifdef VIENNACL_WITH_OPENCL - if (mem_type_ == OPENCL_MEMORY) - ocl_context_ptr_ = &viennacl::ocl::current_context(); - else - ocl_context_ptr_ = NULL; -#endif - } - - explicit context(viennacl::memory_types mtype) : mem_type_(mtype) - { - if (mem_type_ == MEMORY_NOT_INITIALIZED) - mem_type_ = viennacl::backend::default_memory_type(); -#ifdef VIENNACL_WITH_OPENCL - if (mem_type_ == OPENCL_MEMORY) - ocl_context_ptr_ = &viennacl::ocl::current_context(); - else - ocl_context_ptr_ = NULL; -#endif - } - -#ifdef VIENNACL_WITH_OPENCL - context(viennacl::ocl::context const & ctx) : mem_type_(OPENCL_MEMORY), ocl_context_ptr_(&ctx) {} - - viennacl::ocl::context const & opencl_context() const - { - assert(mem_type_ == OPENCL_MEMORY && bool("Context type is not OpenCL")); - return *ocl_context_ptr_; - } -#endif - - // TODO: Add CUDA and OpenMP contexts - - viennacl::memory_types memory_type() const { return mem_type_; } - -private: - viennacl::memory_types mem_type_; -#ifdef VIENNACL_WITH_OPENCL - viennacl::ocl::context const * ocl_context_ptr_; -#endif -}; - - -} - -#endif
