http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/compressed_compressed_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/compressed_compressed_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/compressed_compressed_matrix.hpp new file mode 100644 index 0000000..f1719a2 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/compressed_compressed_matrix.hpp @@ -0,0 +1,619 @@ +#ifndef VIENNACL_COMPRESSED_compressed_compressed_matrix_HPP_ +#define VIENNACL_COMPRESSED_compressed_compressed_matrix_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/compressed_compressed_matrix.hpp + @brief Implementation of the compressed_compressed_matrix class (CSR format with a relatively small number of nonzero rows) +*/ + +#include <vector> +#include <list> +#include <map> +#include "viennacl/forwards.h" +#include "viennacl/vector.hpp" + +#include "viennacl/linalg/sparse_matrix_operations.hpp" + +#include "viennacl/tools/tools.hpp" +#include "viennacl/tools/entry_proxy.hpp" + +namespace viennacl +{ +namespace detail +{ + template<typename CPUMatrixT, typename NumericT> + void copy_impl(const CPUMatrixT & cpu_matrix, + compressed_compressed_matrix<NumericT> & gpu_matrix, + vcl_size_t nonzero_rows, + vcl_size_t nonzeros) + { + assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); + + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), nonzero_rows + 1); + viennacl::backend::typesafe_host_array<unsigned int> row_indices(gpu_matrix.handle3(), nonzero_rows); + viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), nonzeros); + std::vector<NumericT> elements(nonzeros); + + vcl_size_t row_index = 0; + vcl_size_t data_index = 0; + + for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); + row_it != cpu_matrix.end1(); + ++row_it) + { + bool row_empty = true; + + for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); + col_it != row_it.end(); + ++col_it) + { + NumericT entry = *col_it; + if (entry < 0 || entry > 0) // entry != 0 without compiler warnings + { + if (row_empty) + { + assert(row_index < nonzero_rows && bool("Provided count of nonzero rows exceeded!")); + + row_empty = false; + row_buffer.set(row_index, data_index); + row_indices.set(row_index, col_it.index1()); + ++row_index; + } + + col_buffer.set(data_index, col_it.index2()); + elements[data_index] = entry; + ++data_index; + } + } + } + row_buffer.set(row_index, data_index); + + gpu_matrix.set(row_buffer.get(), + row_indices.get(), + col_buffer.get(), + &elements[0], + cpu_matrix.size1(), + cpu_matrix.size2(), + nonzero_rows, + nonzeros); + } +} + +//provide copy-operation: +/** @brief Copies a sparse matrix from the host to the OpenCL device (either GPU or multi-core CPU) + * + * There are some type requirements on the CPUMatrixT type (fulfilled by e.g. boost::numeric::ublas): + * - .size1() returns the number of rows + * - .size2() returns the number of columns + * - const_iterator1 is a type definition for an iterator along increasing row indices + * - const_iterator2 is a type definition for an iterator along increasing columns indices + * - The const_iterator1 type provides an iterator of type const_iterator2 via members .begin() and .end() that iterates along column indices in the current row. + * - The types const_iterator1 and const_iterator2 provide members functions .index1() and .index2() that return the current row and column indices respectively. + * - Dereferenciation of an object of type const_iterator2 returns the entry. + * + * @param cpu_matrix A sparse matrix on the host. + * @param gpu_matrix A compressed_compressed_matrix from ViennaCL + */ +template<typename CPUMatrixT, typename NumericT> +void copy(const CPUMatrixT & cpu_matrix, + compressed_compressed_matrix<NumericT> & gpu_matrix ) +{ + //std::cout << "copy for (" << cpu_matrix.size1() << ", " << cpu_matrix.size2() << ", " << cpu_matrix.nnz() << ")" << std::endl; + + if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) + { + //determine nonzero rows and total nonzeros: + vcl_size_t num_entries = 0; + vcl_size_t nonzero_rows = 0; + for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); + row_it != cpu_matrix.end1(); + ++row_it) + { + bool row_empty = true; + for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); + col_it != row_it.end(); + ++col_it) + { + NumericT val = *col_it; + if (val < 0 || val > 0) // val != 0 without compiler warnings + { + ++num_entries; + + if (row_empty) + { + row_empty = false; + ++nonzero_rows; + } + } + } + } + + if (num_entries == 0) //we copy an empty matrix + num_entries = 1; + + //set up matrix entries: + viennacl::detail::copy_impl(cpu_matrix, gpu_matrix, nonzero_rows, num_entries); + } +} + + +//adapted for std::vector< std::map < > > argument: +/** @brief Copies a sparse square matrix in the std::vector< std::map < > > format to an OpenCL device. Use viennacl::tools::sparse_matrix_adapter for non-square matrices. + * + * @param cpu_matrix A sparse square matrix on the host using STL types + * @param gpu_matrix A compressed_compressed_matrix from ViennaCL + */ +template<typename SizeT, typename NumericT> +void copy(const std::vector< std::map<SizeT, NumericT> > & cpu_matrix, + compressed_compressed_matrix<NumericT> & gpu_matrix ) +{ + vcl_size_t nonzero_rows = 0; + vcl_size_t nonzeros = 0; + vcl_size_t max_col = 0; + for (vcl_size_t i=0; i<cpu_matrix.size(); ++i) + { + if (cpu_matrix[i].size() > 0) + ++nonzero_rows; + nonzeros += cpu_matrix[i].size(); + if (cpu_matrix[i].size() > 0) + max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first); + } + + viennacl::detail::copy_impl(tools::const_sparse_matrix_adapter<NumericT, SizeT>(cpu_matrix, cpu_matrix.size(), max_col + 1), + gpu_matrix, + nonzero_rows, + nonzeros); +} + + +// +// gpu to cpu: +// +/** @brief Copies a sparse matrix from the OpenCL device (either GPU or multi-core CPU) to the host. + * + * There are two type requirements on the CPUMatrixT type (fulfilled by e.g. boost::numeric::ublas): + * - resize(rows, cols) A resize function to bring the matrix into the correct size + * - operator(i,j) Write new entries via the parenthesis operator + * + * @param gpu_matrix A compressed_compressed_matrix from ViennaCL + * @param cpu_matrix A sparse matrix on the host. + */ +template<typename CPUMatrixT, typename NumericT> +void copy(const compressed_compressed_matrix<NumericT> & gpu_matrix, + CPUMatrixT & cpu_matrix ) +{ + assert( (cpu_matrix.size1() == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (cpu_matrix.size2() == gpu_matrix.size2()) && bool("Size mismatch") ); + + if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) + { + //get raw data from memory: + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.nnz1() + 1); + viennacl::backend::typesafe_host_array<unsigned int> row_indices(gpu_matrix.handle1(), gpu_matrix.nnz1()); + viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); + std::vector<NumericT> elements(gpu_matrix.nnz()); + + //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl; + + viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); + viennacl::backend::memory_read(gpu_matrix.handle3(), 0, row_indices.raw_size(), row_indices.get()); + viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); + viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0])); + + //fill the cpu_matrix: + vcl_size_t data_index = 0; + for (vcl_size_t i = 1; i < row_buffer.size(); ++i) + { + while (data_index < row_buffer[i]) + { + if (col_buffer[data_index] >= gpu_matrix.size2()) + { + std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl; + return; + } + + NumericT val = elements[data_index]; + if (val < 0 || val > 0) // val != 0 without compiler warning + cpu_matrix(row_indices[i-1], col_buffer[data_index]) = val; + ++data_index; + } + } + } +} + + +/** @brief Copies a sparse matrix from an OpenCL device to the host. The host type is the std::vector< std::map < > > format . + * + * @param gpu_matrix A compressed_compressed_matrix from ViennaCL + * @param cpu_matrix A sparse matrix on the host. + */ +template<typename NumericT> +void copy(const compressed_compressed_matrix<NumericT> & gpu_matrix, + std::vector< std::map<unsigned int, NumericT> > & cpu_matrix) +{ + tools::sparse_matrix_adapter<NumericT> temp(cpu_matrix, cpu_matrix.size(), cpu_matrix.size()); + copy(gpu_matrix, temp); +} + + +//////////////////////// compressed_compressed_matrix ////////////////////////// +/** @brief A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows carry nonzero entries. + * + * The difference to the 'standard' CSR format is that there is an additional array 'row_indices' so that the i-th set of indices in the CSR-layout refers to row_indices[i]. + * + * @tparam NumericT The floating point type (either float or double, checked at compile time) + * @tparam AlignmentV The internal memory size for the entries in each row is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. + */ +template<class NumericT> +class compressed_compressed_matrix +{ +public: + typedef viennacl::backend::mem_handle handle_type; + typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type; + typedef vcl_size_t size_type; + + /** @brief Default construction of a compressed matrix. No memory is allocated */ + compressed_compressed_matrix() : rows_(0), cols_(0), nonzero_rows_(0), nonzeros_(0) {} + + /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated + * + * @param rows Number of rows + * @param cols Number of columns + * @param nonzero_rows Optional number of nonzero rows for memory preallocation + * @param nonzeros Optional number of nonzeros for memory preallocation + * @param ctx Context in which to create the matrix. Uses the default context if omitted + */ + explicit compressed_compressed_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzero_rows = 0, vcl_size_t nonzeros = 0, viennacl::context ctx = viennacl::context()) + : rows_(rows), cols_(cols), nonzero_rows_(nonzero_rows), nonzeros_(nonzeros) + { + row_buffer_.switch_active_handle_id(ctx.memory_type()); + row_indices_.switch_active_handle_id(ctx.memory_type()); + col_buffer_.switch_active_handle_id(ctx.memory_type()); + elements_.switch_active_handle_id(ctx.memory_type()); + +#ifdef VIENNACL_WITH_OPENCL + if (ctx.memory_type() == OPENCL_MEMORY) + { + row_buffer_.opencl_handle().context(ctx.opencl_context()); + row_indices_.opencl_handle().context(ctx.opencl_context()); + col_buffer_.opencl_handle().context(ctx.opencl_context()); + elements_.opencl_handle().context(ctx.opencl_context()); + } +#endif + if (rows > 0) + { + viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows + 1), ctx); + } + if (nonzeros > 0) + { + viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * nonzeros, ctx); + viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros, ctx); + } + } + + /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated + * + * @param rows Number of rows + * @param cols Number of columns + * @param ctx Context in which to create the matrix + */ + explicit compressed_compressed_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx) + : rows_(rows), cols_(cols), nonzeros_(0) + { + row_buffer_.switch_active_handle_id(ctx.memory_type()); + col_buffer_.switch_active_handle_id(ctx.memory_type()); + elements_.switch_active_handle_id(ctx.memory_type()); + +#ifdef VIENNACL_WITH_OPENCL + if (ctx.memory_type() == OPENCL_MEMORY) + { + row_buffer_.opencl_handle().context(ctx.opencl_context()); + col_buffer_.opencl_handle().context(ctx.opencl_context()); + elements_.opencl_handle().context(ctx.opencl_context()); + } +#endif + if (rows > 0) + { + viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows + 1), ctx); + } + } + + explicit compressed_compressed_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzero_rows_(0), nonzeros_(0) + { + row_buffer_.switch_active_handle_id(ctx.memory_type()); + row_indices_.switch_active_handle_id(ctx.memory_type()); + col_buffer_.switch_active_handle_id(ctx.memory_type()); + elements_.switch_active_handle_id(ctx.memory_type()); + +#ifdef VIENNACL_WITH_OPENCL + if (ctx.memory_type() == OPENCL_MEMORY) + { + row_buffer_.opencl_handle().context(ctx.opencl_context()); + row_indices_.opencl_handle().context(ctx.opencl_context()); + col_buffer_.opencl_handle().context(ctx.opencl_context()); + elements_.opencl_handle().context(ctx.opencl_context()); + } +#endif + } + + +#ifdef VIENNACL_WITH_OPENCL + explicit compressed_compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_row_indices, cl_mem mem_col_buffer, cl_mem mem_elements, + vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzero_rows, vcl_size_t nonzeros) : + rows_(rows), cols_(cols), nonzero_rows_(nonzero_rows), nonzeros_(nonzeros) + { + row_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); + row_buffer_.opencl_handle() = mem_row_buffer; + row_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. + row_buffer_.raw_size(sizeof(cl_uint) * (nonzero_rows + 1)); + + row_indices_.switch_active_handle_id(viennacl::OPENCL_MEMORY); + row_indices_.opencl_handle() = mem_row_indices; + row_indices_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. + row_indices_.raw_size(sizeof(cl_uint) * nonzero_rows); + + col_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); + col_buffer_.opencl_handle() = mem_col_buffer; + col_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. + col_buffer_.raw_size(sizeof(cl_uint) * nonzeros); + + elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); + elements_.opencl_handle() = mem_elements; + elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. + elements_.raw_size(sizeof(NumericT) * nonzeros); + } +#endif + + + /** @brief Assignment a compressed matrix from possibly another memory domain. */ + compressed_compressed_matrix & operator=(compressed_compressed_matrix const & other) + { + assert( (rows_ == 0 || rows_ == other.size1()) && bool("Size mismatch") ); + assert( (cols_ == 0 || cols_ == other.size2()) && bool("Size mismatch") ); + + rows_ = other.size1(); + cols_ = other.size2(); + nonzero_rows_ = other.nnz1(); + nonzeros_ = other.nnz(); + + viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_buffer_, row_buffer_); + viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_indices_, row_indices_); + viennacl::backend::typesafe_memory_copy<unsigned int>(other.col_buffer_, col_buffer_); + viennacl::backend::typesafe_memory_copy<NumericT>(other.elements_, elements_); + + return *this; + } + + + /** @brief Sets the row, column and value arrays of the compressed matrix + * + * @param row_jumper Pointer to an array holding the indices of the first element of each row (starting with zero). E.g. row_jumper[10] returns the index of the first entry of the 11th row. The array length is 'cols + 1' + * @param row_indices Array holding the indices of the nonzero rows + * @param col_buffer Pointer to an array holding the column index of each entry. The array length is 'nonzeros' + * @param elements Pointer to an array holding the entries of the sparse matrix. The array length is 'elements' + * @param rows Number of rows of the sparse matrix + * @param cols Number of columns of the sparse matrix + * @param nonzero_rows Number of nonzero rows + * @param nonzeros Total number of nonzero entries + */ + void set(const void * row_jumper, + const void * row_indices, + const void * col_buffer, + const NumericT * elements, + vcl_size_t rows, + vcl_size_t cols, + vcl_size_t nonzero_rows, + vcl_size_t nonzeros) + { + assert( (rows > 0) && bool("Error in compressed_compressed_matrix::set(): Number of rows must be larger than zero!")); + assert( (cols > 0) && bool("Error in compressed_compressed_matrix::set(): Number of columns must be larger than zero!")); + assert( (nonzero_rows > 0) && bool("Error in compressed_compressed_matrix::set(): Number of nonzero rows must be larger than zero!")); + assert( (nonzeros > 0) && bool("Error in compressed_compressed_matrix::set(): Number of nonzeros must be larger than zero!")); + //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl; + + viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>(row_buffer_).element_size() * (nonzero_rows + 1), viennacl::traits::context(row_buffer_), row_jumper); + viennacl::backend::memory_create(row_indices_, viennacl::backend::typesafe_host_array<unsigned int>(row_indices_).element_size() * nonzero_rows, viennacl::traits::context(row_indices_), row_indices); + viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>(col_buffer_).element_size() * nonzeros, viennacl::traits::context(col_buffer_), col_buffer); + viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros, viennacl::traits::context(elements_), elements); + + nonzeros_ = nonzeros; + nonzero_rows_ = nonzero_rows; + rows_ = rows; + cols_ = cols; + } + + /** @brief Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity pattern. */ + void clear() + { + viennacl::backend::typesafe_host_array<unsigned int> host_row_buffer(row_buffer_, rows_ + 1); + viennacl::backend::typesafe_host_array<unsigned int> host_row_indices(row_indices_, rows_ + 1); + viennacl::backend::typesafe_host_array<unsigned int> host_col_buffer(col_buffer_, 1); + std::vector<NumericT> host_elements(1); + + viennacl::backend::memory_create(row_buffer_, host_row_buffer.element_size() * (rows_ + 1), viennacl::traits::context(row_buffer_), host_row_buffer.get()); + viennacl::backend::memory_create(row_indices_, host_row_indices.element_size() * (rows_ + 1), viennacl::traits::context(row_indices_), host_row_indices.get()); + viennacl::backend::memory_create(col_buffer_, host_col_buffer.element_size() * 1, viennacl::traits::context(col_buffer_), host_col_buffer.get()); + viennacl::backend::memory_create(elements_, sizeof(NumericT) * 1, viennacl::traits::context(elements_), &(host_elements[0])); + + nonzeros_ = 0; + nonzero_rows_ = 0; + } + + /** @brief Returns the number of rows */ + const vcl_size_t & size1() const { return rows_; } + /** @brief Returns the number of columns */ + const vcl_size_t & size2() const { return cols_; } + /** @brief Returns the number of nonzero entries */ + const vcl_size_t & nnz1() const { return nonzero_rows_; } + /** @brief Returns the number of nonzero entries */ + const vcl_size_t & nnz() const { return nonzeros_; } + + /** @brief Returns the OpenCL handle to the row index array */ + const handle_type & handle1() const { return row_buffer_; } + /** @brief Returns the OpenCL handle to the column index array */ + const handle_type & handle2() const { return col_buffer_; } + /** @brief Returns the OpenCL handle to the row index array */ + const handle_type & handle3() const { return row_indices_; } + /** @brief Returns the OpenCL handle to the matrix entry array */ + const handle_type & handle() const { return elements_; } + + /** @brief Returns the OpenCL handle to the row index array */ + handle_type & handle1() { return row_buffer_; } + /** @brief Returns the OpenCL handle to the column index array */ + handle_type & handle2() { return col_buffer_; } + /** @brief Returns the OpenCL handle to the row index array */ + handle_type & handle3() { return row_indices_; } + /** @brief Returns the OpenCL handle to the matrix entry array */ + handle_type & handle() { return elements_; } + + void switch_memory_context(viennacl::context new_ctx) + { + viennacl::backend::switch_memory_context<unsigned int>(row_buffer_, new_ctx); + viennacl::backend::switch_memory_context<unsigned int>(row_indices_, new_ctx); + viennacl::backend::switch_memory_context<unsigned int>(col_buffer_, new_ctx); + viennacl::backend::switch_memory_context<NumericT>(elements_, new_ctx); + } + + viennacl::memory_types memory_context() const + { + return row_buffer_.get_active_handle_id(); + } + +private: + + vcl_size_t rows_; + vcl_size_t cols_; + vcl_size_t nonzero_rows_; + vcl_size_t nonzeros_; + handle_type row_buffer_; + handle_type row_indices_; + handle_type col_buffer_; + handle_type elements_; +}; + + + +// +// Specify available operations: +// + +/** \cond */ + +namespace linalg +{ +namespace detail +{ + // x = A * y + template<typename T> + struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x = A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs = temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(0)); + } + }; + + template<typename T> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x += A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs += temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(1)); + } + }; + + template<typename T> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x -= A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs -= temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(-1), lhs, T(1)); + } + }; + + + // x = A * vec_op + template<typename T, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_compressed_matrix<T>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs()); + viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); + } + }; + + // x = A * vec_op + template<typename T, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_compressed_matrix<T>, vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs += temp_result; + } + }; + + // x = A * vec_op + template<typename T, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_compressed_matrix<T>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_compressed_matrix<T>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs -= temp_result; + } + }; + +} // namespace detail +} // namespace linalg + +/** \endcond */ +} + +#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/compressed_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/compressed_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/compressed_matrix.hpp new file mode 100644 index 0000000..e42f552 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/compressed_matrix.hpp @@ -0,0 +1,1178 @@ +#ifndef VIENNACL_COMPRESSED_MATRIX_HPP_ +#define VIENNACL_COMPRESSED_MATRIX_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/compressed_matrix.hpp + @brief Implementation of the compressed_matrix class +*/ + +#include <vector> +#include <list> +#include <map> +#include "viennacl/forwards.h" +#include "viennacl/vector.hpp" + +#include "viennacl/linalg/sparse_matrix_operations.hpp" + +#include "viennacl/tools/tools.hpp" +#include "viennacl/tools/entry_proxy.hpp" + +#ifdef VIENNACL_WITH_UBLAS +#include <boost/numeric/ublas/matrix_sparse.hpp> +#endif + +namespace viennacl +{ +namespace detail +{ + + /** @brief Implementation of the copy of a host-based sparse matrix to the device. + * + * See convenience copy() routines for type requirements of CPUMatrixT + */ + template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> + void copy_impl(const CPUMatrixT & cpu_matrix, + compressed_matrix<NumericT, AlignmentV> & gpu_matrix, + vcl_size_t nonzeros) + { + assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); + + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), cpu_matrix.size1() + 1); + viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), nonzeros); + std::vector<NumericT> elements(nonzeros); + + vcl_size_t row_index = 0; + vcl_size_t data_index = 0; + + for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); + row_it != cpu_matrix.end1(); + ++row_it) + { + row_buffer.set(row_index, data_index); + ++row_index; + + for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); + col_it != row_it.end(); + ++col_it) + { + col_buffer.set(data_index, col_it.index2()); + elements[data_index] = *col_it; + ++data_index; + } + data_index = viennacl::tools::align_to_multiple<vcl_size_t>(data_index, AlignmentV); //take care of alignment + } + row_buffer.set(row_index, data_index); + + gpu_matrix.set(row_buffer.get(), + col_buffer.get(), + &elements[0], + cpu_matrix.size1(), + cpu_matrix.size2(), + nonzeros); + } +} + +// +// host to device: +// + +//provide copy-operation: +/** @brief Copies a sparse matrix from the host to the OpenCL device (either GPU or multi-core CPU) + * + * There are some type requirements on the CPUMatrixT type (fulfilled by e.g. boost::numeric::ublas): + * - .size1() returns the number of rows + * - .size2() returns the number of columns + * - const_iterator1 is a type definition for an iterator along increasing row indices + * - const_iterator2 is a type definition for an iterator along increasing columns indices + * - The const_iterator1 type provides an iterator of type const_iterator2 via members .begin() and .end() that iterates along column indices in the current row. + * - The types const_iterator1 and const_iterator2 provide members functions .index1() and .index2() that return the current row and column indices respectively. + * - Dereferenciation of an object of type const_iterator2 returns the entry. + * + * @param cpu_matrix A sparse matrix on the host. + * @param gpu_matrix A compressed_matrix from ViennaCL + */ +template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> +void copy(const CPUMatrixT & cpu_matrix, + compressed_matrix<NumericT, AlignmentV> & gpu_matrix ) +{ + if ( cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0 ) + { + //determine nonzeros: + vcl_size_t num_entries = 0; + for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); + row_it != cpu_matrix.end1(); + ++row_it) + { + vcl_size_t entries_per_row = 0; + for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); + col_it != row_it.end(); + ++col_it) + { + ++entries_per_row; + } + num_entries += viennacl::tools::align_to_multiple<vcl_size_t>(entries_per_row, AlignmentV); + } + + if (num_entries == 0) //we copy an empty matrix + num_entries = 1; + + //set up matrix entries: + viennacl::detail::copy_impl(cpu_matrix, gpu_matrix, num_entries); + } +} + + +//adapted for std::vector< std::map < > > argument: +/** @brief Copies a sparse square matrix in the std::vector< std::map < > > format to an OpenCL device. Use viennacl::tools::sparse_matrix_adapter for non-square matrices. + * + * @param cpu_matrix A sparse square matrix on the host using STL types + * @param gpu_matrix A compressed_matrix from ViennaCL + */ +template<typename SizeT, typename NumericT, unsigned int AlignmentV> +void copy(const std::vector< std::map<SizeT, NumericT> > & cpu_matrix, + compressed_matrix<NumericT, AlignmentV> & gpu_matrix ) +{ + vcl_size_t nonzeros = 0; + vcl_size_t max_col = 0; + for (vcl_size_t i=0; i<cpu_matrix.size(); ++i) + { + if (cpu_matrix[i].size() > 0) + nonzeros += ((cpu_matrix[i].size() - 1) / AlignmentV + 1) * AlignmentV; + if (cpu_matrix[i].size() > 0) + max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first); + } + + viennacl::detail::copy_impl(tools::const_sparse_matrix_adapter<NumericT, SizeT>(cpu_matrix, cpu_matrix.size(), max_col + 1), + gpu_matrix, + nonzeros); +} + +#ifdef VIENNACL_WITH_UBLAS +/** @brief Convenience routine for copying a sparse uBLAS matrix to a ViennaCL matrix. + * + * Optimization which copies the data directly from the internal uBLAS buffers. + */ +template<typename ScalarType, typename F, vcl_size_t IB, typename IA, typename TA> +void copy(const boost::numeric::ublas::compressed_matrix<ScalarType, F, IB, IA, TA> & ublas_matrix, + viennacl::compressed_matrix<ScalarType, 1> & gpu_matrix) +{ + assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); + + //we just need to copy the CSR arrays: + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), ublas_matrix.size1() + 1); + for (vcl_size_t i=0; i<=ublas_matrix.size1(); ++i) + row_buffer.set(i, ublas_matrix.index1_data()[i]); + + viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), ublas_matrix.nnz()); + for (vcl_size_t i=0; i<ublas_matrix.nnz(); ++i) + col_buffer.set(i, ublas_matrix.index2_data()[i]); + + gpu_matrix.set(row_buffer.get(), + col_buffer.get(), + &(ublas_matrix.value_data()[0]), + ublas_matrix.size1(), + ublas_matrix.size2(), + ublas_matrix.nnz()); + +} +#endif + +#ifdef VIENNACL_WITH_ARMADILLO +/** @brief Convenience routine for copying a sparse Armadillo matrix to a ViennaCL matrix. + * + * Since Armadillo uses a column-major format, while ViennaCL uses row-major, we need to transpose. + * This is done fairly efficiently working on the CSR arrays directly, rather than (slowly) building an STL matrix. + */ +template<typename NumericT, unsigned int AlignmentV> +void copy(arma::SpMat<NumericT> const & arma_matrix, + viennacl::compressed_matrix<NumericT, AlignmentV> & vcl_matrix) +{ + assert( (vcl_matrix.size1() == 0 || static_cast<vcl_size_t>(arma_matrix.n_rows) == vcl_matrix.size1()) && bool("Size mismatch") ); + assert( (vcl_matrix.size2() == 0 || static_cast<vcl_size_t>(arma_matrix.n_cols) == vcl_matrix.size2()) && bool("Size mismatch") ); + + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(vcl_matrix.handle1(), arma_matrix.n_rows + 1); + viennacl::backend::typesafe_host_array<unsigned int> col_buffer(vcl_matrix.handle2(), arma_matrix.n_nonzero); + viennacl::backend::typesafe_host_array<NumericT > value_buffer(vcl_matrix.handle(), arma_matrix.n_nonzero); + + // Step 1: Count number of nonzeros in each row + for (vcl_size_t col=0; col < static_cast<vcl_size_t>(arma_matrix.n_cols); ++col) + { + vcl_size_t col_begin = static_cast<vcl_size_t>(arma_matrix.col_ptrs[col]); + vcl_size_t col_end = static_cast<vcl_size_t>(arma_matrix.col_ptrs[col+1]); + for (vcl_size_t i = col_begin; i < col_end; ++i) + { + unsigned int row = arma_matrix.row_indices[i]; + row_buffer.set(row, row_buffer[row] + 1); + } + } + + // Step 2: Exclusive scan on row_buffer to obtain offsets + unsigned int offset = 0; + for (vcl_size_t i=0; i<row_buffer.size(); ++i) + { + unsigned int tmp = row_buffer[i]; + row_buffer.set(i, offset); + offset += tmp; + } + + // Step 3: Fill data + std::vector<unsigned int> row_offsets(arma_matrix.n_rows); + for (vcl_size_t col=0; col < static_cast<vcl_size_t>(arma_matrix.n_cols); ++col) + { + vcl_size_t col_begin = static_cast<vcl_size_t>(arma_matrix.col_ptrs[col]); + vcl_size_t col_end = static_cast<vcl_size_t>(arma_matrix.col_ptrs[col+1]); + for (vcl_size_t i = col_begin; i < col_end; ++i) + { + unsigned int row = arma_matrix.row_indices[i]; + col_buffer.set(row_buffer[row] + row_offsets[row], col); + value_buffer.set(row_buffer[row] + row_offsets[row], arma_matrix.values[i]); + row_offsets[row] += 1; + } + } + + vcl_matrix.set(row_buffer.get(), col_buffer.get(), reinterpret_cast<NumericT*>(value_buffer.get()), + arma_matrix.n_rows, arma_matrix.n_cols, arma_matrix.n_nonzero); +} +#endif + +#ifdef VIENNACL_WITH_EIGEN +/** @brief Convenience routine for copying a sparse Eigen matrix to a ViennaCL matrix. + * + * Builds a temporary STL matrix. Patches for avoiding the temporary matrix welcome. + */ +template<typename NumericT, int flags, unsigned int AlignmentV> +void copy(const Eigen::SparseMatrix<NumericT, flags> & eigen_matrix, + compressed_matrix<NumericT, AlignmentV> & gpu_matrix) +{ + assert( (gpu_matrix.size1() == 0 || static_cast<vcl_size_t>(eigen_matrix.rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (gpu_matrix.size2() == 0 || static_cast<vcl_size_t>(eigen_matrix.cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); + + std::vector< std::map<unsigned int, NumericT> > stl_matrix(eigen_matrix.rows()); + + for (int k=0; k < eigen_matrix.outerSize(); ++k) + for (typename Eigen::SparseMatrix<NumericT, flags>::InnerIterator it(eigen_matrix, k); it; ++it) + stl_matrix[it.row()][it.col()] = it.value(); + + copy(tools::const_sparse_matrix_adapter<NumericT>(stl_matrix, eigen_matrix.rows(), eigen_matrix.cols()), gpu_matrix); +} +#endif + + +#ifdef VIENNACL_WITH_MTL4 +/** @brief Convenience routine for copying a sparse MTL4 matrix to a ViennaCL matrix. + * + * Builds a temporary STL matrix for the copy. Patches for avoiding the temporary matrix welcome. + */ +template<typename NumericT, unsigned int AlignmentV> +void copy(const mtl::compressed2D<NumericT> & cpu_matrix, + compressed_matrix<NumericT, AlignmentV> & gpu_matrix) +{ + assert( (gpu_matrix.size1() == 0 || static_cast<vcl_size_t>(cpu_matrix.num_rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (gpu_matrix.size2() == 0 || static_cast<vcl_size_t>(cpu_matrix.num_cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); + + typedef mtl::compressed2D<NumericT> MatrixType; + + std::vector< std::map<unsigned int, NumericT> > stl_matrix(cpu_matrix.num_rows()); + + using mtl::traits::range_generator; + using mtl::traits::range::min; + + // Choose between row and column traversal + typedef typename min<range_generator<mtl::tag::row, MatrixType>, + range_generator<mtl::tag::col, MatrixType> >::type range_type; + range_type my_range; + + // Type of outer cursor + typedef typename range_type::type c_type; + // Type of inner cursor + typedef typename mtl::traits::range_generator<mtl::tag::nz, c_type>::type ic_type; + + // Define the property maps + typename mtl::traits::row<MatrixType>::type row(cpu_matrix); + typename mtl::traits::col<MatrixType>::type col(cpu_matrix); + typename mtl::traits::const_value<MatrixType>::type value(cpu_matrix); + + // Now iterate over the matrix + for (c_type cursor(my_range.begin(cpu_matrix)), cend(my_range.end(cpu_matrix)); cursor != cend; ++cursor) + for (ic_type icursor(mtl::begin<mtl::tag::nz>(cursor)), icend(mtl::end<mtl::tag::nz>(cursor)); icursor != icend; ++icursor) + stl_matrix[row(*icursor)][col(*icursor)] = value(*icursor); + + copy(tools::const_sparse_matrix_adapter<NumericT>(stl_matrix, cpu_matrix.num_rows(), cpu_matrix.num_cols()), gpu_matrix); +} +#endif + + + + + + + +// +// device to host: +// +/** @brief Copies a sparse matrix from the OpenCL device (either GPU or multi-core CPU) to the host. + * + * There are two type requirements on the CPUMatrixT type (fulfilled by e.g. boost::numeric::ublas): + * - resize(rows, cols) A resize function to bring the matrix into the correct size + * - operator(i,j) Write new entries via the parenthesis operator + * + * @param gpu_matrix A compressed_matrix from ViennaCL + * @param cpu_matrix A sparse matrix on the host. + */ +template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> +void copy(const compressed_matrix<NumericT, AlignmentV> & gpu_matrix, + CPUMatrixT & cpu_matrix ) +{ + assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); + + if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) + { + //get raw data from memory: + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), cpu_matrix.size1() + 1); + viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); + std::vector<NumericT> elements(gpu_matrix.nnz()); + + //std::cout << "GPU->CPU, nonzeros: " << gpu_matrix.nnz() << std::endl; + + viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); + viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); + viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0])); + + //fill the cpu_matrix: + vcl_size_t data_index = 0; + for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) + { + while (data_index < row_buffer[row]) + { + if (col_buffer[data_index] >= gpu_matrix.size2()) + { + std::cerr << "ViennaCL encountered invalid data at colbuffer[" << data_index << "]: " << col_buffer[data_index] << std::endl; + return; + } + + if (std::fabs(elements[data_index]) > static_cast<NumericT>(0)) + cpu_matrix(row-1, static_cast<vcl_size_t>(col_buffer[data_index])) = elements[data_index]; + ++data_index; + } + } + } +} + + +/** @brief Copies a sparse matrix from an OpenCL device to the host. The host type is the std::vector< std::map < > > format . + * + * @param gpu_matrix A compressed_matrix from ViennaCL + * @param cpu_matrix A sparse matrix on the host. + */ +template<typename NumericT, unsigned int AlignmentV> +void copy(const compressed_matrix<NumericT, AlignmentV> & gpu_matrix, + std::vector< std::map<unsigned int, NumericT> > & cpu_matrix) +{ + assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Size mismatch") ); + + tools::sparse_matrix_adapter<NumericT> temp(cpu_matrix, gpu_matrix.size1(), gpu_matrix.size2()); + copy(gpu_matrix, temp); +} + +#ifdef VIENNACL_WITH_UBLAS +/** @brief Convenience routine for copying a ViennaCL sparse matrix back to a sparse uBLAS matrix + * + * Directly populates the internal buffer of the uBLAS matrix, thus avoiding a temporary STL matrix. + */ +template<typename ScalarType, unsigned int AlignmentV, typename F, vcl_size_t IB, typename IA, typename TA> +void copy(viennacl::compressed_matrix<ScalarType, AlignmentV> const & gpu_matrix, + boost::numeric::ublas::compressed_matrix<ScalarType> & ublas_matrix) +{ + assert( (viennacl::traits::size1(ublas_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (viennacl::traits::size2(ublas_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); + + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); + viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); + + viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); + viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); + + ublas_matrix.clear(); + ublas_matrix.reserve(gpu_matrix.nnz()); + + ublas_matrix.set_filled(gpu_matrix.size1() + 1, gpu_matrix.nnz()); + + for (vcl_size_t i=0; i<ublas_matrix.size1() + 1; ++i) + ublas_matrix.index1_data()[i] = row_buffer[i]; + + for (vcl_size_t i=0; i<ublas_matrix.nnz(); ++i) + ublas_matrix.index2_data()[i] = col_buffer[i]; + + viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(ScalarType) * gpu_matrix.nnz(), &(ublas_matrix.value_data()[0])); + +} +#endif + +#ifdef VIENNACL_WITH_ARMADILLO +/** @brief Convenience routine for copying a ViennaCL sparse matrix back to a sparse Armadillo matrix. + * + * Performance notice: Inserting the row-major data from the ViennaCL matrix to the column-major Armadillo-matrix is likely to be slow. + * However, since this operation is unlikely to be performance-critical, further optimizations are postponed. +*/ +template<typename NumericT, unsigned int AlignmentV> +void copy(viennacl::compressed_matrix<NumericT, AlignmentV> & vcl_matrix, + arma::SpMat<NumericT> & arma_matrix) +{ + assert( (static_cast<vcl_size_t>(arma_matrix.n_rows) == vcl_matrix.size1()) && bool("Size mismatch") ); + assert( (static_cast<vcl_size_t>(arma_matrix.n_cols) == vcl_matrix.size2()) && bool("Size mismatch") ); + + if ( vcl_matrix.size1() > 0 && vcl_matrix.size2() > 0 ) + { + //get raw data from memory: + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(vcl_matrix.handle1(), vcl_matrix.size1() + 1); + viennacl::backend::typesafe_host_array<unsigned int> col_buffer(vcl_matrix.handle2(), vcl_matrix.nnz()); + viennacl::backend::typesafe_host_array<NumericT> elements (vcl_matrix.handle(), vcl_matrix.nnz()); + + viennacl::backend::memory_read(vcl_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); + viennacl::backend::memory_read(vcl_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); + viennacl::backend::memory_read(vcl_matrix.handle(), 0, elements.raw_size(), elements.get()); + + arma_matrix.zeros(); + vcl_size_t data_index = 0; + for (vcl_size_t row = 1; row <= vcl_matrix.size1(); ++row) + { + while (data_index < row_buffer[row]) + { + assert(col_buffer[data_index] < vcl_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer")); + if (elements[data_index] != static_cast<NumericT>(0.0)) + arma_matrix(row-1, col_buffer[data_index]) = elements[data_index]; + ++data_index; + } + } + } +} +#endif + +#ifdef VIENNACL_WITH_EIGEN +/** @brief Convenience routine for copying a ViennaCL sparse matrix back to a sparse Eigen matrix */ +template<typename NumericT, int flags, unsigned int AlignmentV> +void copy(compressed_matrix<NumericT, AlignmentV> & gpu_matrix, + Eigen::SparseMatrix<NumericT, flags> & eigen_matrix) +{ + assert( (static_cast<vcl_size_t>(eigen_matrix.rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (static_cast<vcl_size_t>(eigen_matrix.cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); + + if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) + { + //get raw data from memory: + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); + viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); + std::vector<NumericT> elements(gpu_matrix.nnz()); + + viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); + viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); + viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0])); + + eigen_matrix.setZero(); + vcl_size_t data_index = 0; + for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) + { + while (data_index < row_buffer[row]) + { + assert(col_buffer[data_index] < gpu_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer")); + if (elements[data_index] != static_cast<NumericT>(0.0)) + eigen_matrix.insert(row-1, col_buffer[data_index]) = elements[data_index]; + ++data_index; + } + } + } +} +#endif + + + +#ifdef VIENNACL_WITH_MTL4 +/** @brief Convenience routine for copying a ViennaCL sparse matrix back to a sparse MTL4 matrix */ +template<typename NumericT, unsigned int AlignmentV> +void copy(compressed_matrix<NumericT, AlignmentV> & gpu_matrix, + mtl::compressed2D<NumericT> & mtl4_matrix) +{ + assert( (static_cast<vcl_size_t>(mtl4_matrix.num_rows()) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (static_cast<vcl_size_t>(mtl4_matrix.num_cols()) == gpu_matrix.size2()) && bool("Size mismatch") ); + + if ( gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0 ) + { + + //get raw data from memory: + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(gpu_matrix.handle1(), gpu_matrix.size1() + 1); + viennacl::backend::typesafe_host_array<unsigned int> col_buffer(gpu_matrix.handle2(), gpu_matrix.nnz()); + std::vector<NumericT> elements(gpu_matrix.nnz()); + + viennacl::backend::memory_read(gpu_matrix.handle1(), 0, row_buffer.raw_size(), row_buffer.get()); + viennacl::backend::memory_read(gpu_matrix.handle2(), 0, col_buffer.raw_size(), col_buffer.get()); + viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT)* gpu_matrix.nnz(), &(elements[0])); + + //set_to_zero(mtl4_matrix); + //mtl4_matrix.change_dim(gpu_matrix.size1(), gpu_matrix.size2()); + + mtl::matrix::inserter< mtl::compressed2D<NumericT> > ins(mtl4_matrix); + vcl_size_t data_index = 0; + for (vcl_size_t row = 1; row <= gpu_matrix.size1(); ++row) + { + while (data_index < row_buffer[row]) + { + assert(col_buffer[data_index] < gpu_matrix.size2() && bool("ViennaCL encountered invalid data at col_buffer")); + if (elements[data_index] != static_cast<NumericT>(0.0)) + ins(row-1, col_buffer[data_index]) << typename mtl::Collection< mtl::compressed2D<NumericT> >::value_type(elements[data_index]); + ++data_index; + } + } + } +} +#endif + + + + + +//////////////////////// compressed_matrix ////////////////////////// +/** @brief A sparse square matrix in compressed sparse rows format. + * + * @tparam NumericT The floating point type (either float or double, checked at compile time) + * @tparam AlignmentV The internal memory size for the entries in each row is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. + */ +template<class NumericT, unsigned int AlignmentV /* see VCLForwards.h */> +class compressed_matrix +{ +public: + typedef viennacl::backend::mem_handle handle_type; + typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type; + typedef vcl_size_t size_type; + + /** @brief Default construction of a compressed matrix. No memory is allocated */ + compressed_matrix() : rows_(0), cols_(0), nonzeros_(0), row_block_num_(0) {} + + /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated + * + * @param rows Number of rows + * @param cols Number of columns + * @param nonzeros Optional number of nonzeros for memory preallocation + * @param ctx Optional context in which the matrix is created (one out of multiple OpenCL contexts, CUDA, host) + */ + explicit compressed_matrix(vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros = 0, viennacl::context ctx = viennacl::context()) + : rows_(rows), cols_(cols), nonzeros_(nonzeros), row_block_num_(0) + { + row_buffer_.switch_active_handle_id(ctx.memory_type()); + col_buffer_.switch_active_handle_id(ctx.memory_type()); + elements_.switch_active_handle_id(ctx.memory_type()); + row_blocks_.switch_active_handle_id(ctx.memory_type()); + +#ifdef VIENNACL_WITH_OPENCL + if (ctx.memory_type() == OPENCL_MEMORY) + { + row_buffer_.opencl_handle().context(ctx.opencl_context()); + col_buffer_.opencl_handle().context(ctx.opencl_context()); + elements_.opencl_handle().context(ctx.opencl_context()); + row_blocks_.opencl_handle().context(ctx.opencl_context()); + } +#endif + if (rows > 0) + { + viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows + 1), ctx); + viennacl::vector_base<unsigned int> init_temporary(row_buffer_, size_type(rows+1), 0, 1); + init_temporary = viennacl::zero_vector<unsigned int>(size_type(rows+1), ctx); + } + if (nonzeros > 0) + { + viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * nonzeros, ctx); + viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros, ctx); + } + } + + /** @brief Construction of a compressed matrix with the supplied number of rows and columns. If the number of nonzeros is positive, memory is allocated + * + * @param rows Number of rows + * @param cols Number of columns + * @param ctx Context in which to create the matrix + */ + explicit compressed_matrix(vcl_size_t rows, vcl_size_t cols, viennacl::context ctx) + : rows_(rows), cols_(cols), nonzeros_(0), row_block_num_(0) + { + row_buffer_.switch_active_handle_id(ctx.memory_type()); + col_buffer_.switch_active_handle_id(ctx.memory_type()); + elements_.switch_active_handle_id(ctx.memory_type()); + row_blocks_.switch_active_handle_id(ctx.memory_type()); + +#ifdef VIENNACL_WITH_OPENCL + if (ctx.memory_type() == OPENCL_MEMORY) + { + row_buffer_.opencl_handle().context(ctx.opencl_context()); + col_buffer_.opencl_handle().context(ctx.opencl_context()); + elements_.opencl_handle().context(ctx.opencl_context()); + row_blocks_.opencl_handle().context(ctx.opencl_context()); + } +#endif + if (rows > 0) + { + viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (rows + 1), ctx); + viennacl::vector_base<unsigned int> init_temporary(row_buffer_, size_type(rows+1), 0, 1); + init_temporary = viennacl::zero_vector<unsigned int>(size_type(rows+1), ctx); + } + } + + /** @brief Creates an empty compressed_matrix, but sets the respective context information. + * + * This is useful if you want to want to populate e.g. a viennacl::compressed_matrix<> on the host with copy(), but the default backend is OpenCL. + */ + explicit compressed_matrix(viennacl::context ctx) : rows_(0), cols_(0), nonzeros_(0), row_block_num_(0) + { + row_buffer_.switch_active_handle_id(ctx.memory_type()); + col_buffer_.switch_active_handle_id(ctx.memory_type()); + elements_.switch_active_handle_id(ctx.memory_type()); + row_blocks_.switch_active_handle_id(ctx.memory_type()); + +#ifdef VIENNACL_WITH_OPENCL + if (ctx.memory_type() == OPENCL_MEMORY) + { + row_buffer_.opencl_handle().context(ctx.opencl_context()); + col_buffer_.opencl_handle().context(ctx.opencl_context()); + elements_.opencl_handle().context(ctx.opencl_context()); + row_blocks_.opencl_handle().context(ctx.opencl_context()); + } +#endif + } + + +#ifdef VIENNACL_WITH_OPENCL + /** @brief Wraps existing OpenCL buffers holding the compressed sparse row information. + * + * @param mem_row_buffer A buffer consisting of unsigned integers (cl_uint) holding the entry points for each row (0-based indexing). (rows+1) elements, the last element being 'nonzeros'. + * @param mem_col_buffer A buffer consisting of unsigned integers (cl_uint) holding the column index for each nonzero entry as stored in 'mem_elements'. + * @param mem_elements A buffer holding the floating point numbers for nonzeros. OpenCL type of elements must match the template 'NumericT'. + * @param rows Number of rows in the matrix to be wrapped. + * @param cols Number of columns to be wrapped. + * @param nonzeros Number of nonzero entries in the matrix. + */ + explicit compressed_matrix(cl_mem mem_row_buffer, cl_mem mem_col_buffer, cl_mem mem_elements, + vcl_size_t rows, vcl_size_t cols, vcl_size_t nonzeros) : + rows_(rows), cols_(cols), nonzeros_(nonzeros), row_block_num_(0) + { + row_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); + row_buffer_.opencl_handle() = mem_row_buffer; + row_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. + row_buffer_.raw_size(sizeof(cl_uint) * (rows + 1)); + + col_buffer_.switch_active_handle_id(viennacl::OPENCL_MEMORY); + col_buffer_.opencl_handle() = mem_col_buffer; + col_buffer_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. + col_buffer_.raw_size(sizeof(cl_uint) * nonzeros); + + elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY); + elements_.opencl_handle() = mem_elements; + elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the matrix object is destroyed. + elements_.raw_size(sizeof(NumericT) * nonzeros); + + //generate block information for CSR-adaptive: + generate_row_block_information(); + } +#endif + + /** @brief Assignment a compressed matrix from the product of two compressed_matrix objects (C = A * B). */ + compressed_matrix(matrix_expression<const compressed_matrix, const compressed_matrix, op_prod> const & proxy) + : rows_(0), cols_(0), nonzeros_(0), row_block_num_(0) + { + viennacl::context ctx = viennacl::traits::context(proxy.lhs()); + + row_buffer_.switch_active_handle_id(ctx.memory_type()); + col_buffer_.switch_active_handle_id(ctx.memory_type()); + elements_.switch_active_handle_id(ctx.memory_type()); + row_blocks_.switch_active_handle_id(ctx.memory_type()); + +#ifdef VIENNACL_WITH_OPENCL + if (ctx.memory_type() == OPENCL_MEMORY) + { + row_buffer_.opencl_handle().context(ctx.opencl_context()); + col_buffer_.opencl_handle().context(ctx.opencl_context()); + elements_.opencl_handle().context(ctx.opencl_context()); + row_blocks_.opencl_handle().context(ctx.opencl_context()); + } +#endif + + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); + generate_row_block_information(); + } + + /** @brief Assignment a compressed matrix from possibly another memory domain. */ + compressed_matrix & operator=(compressed_matrix const & other) + { + assert( (rows_ == 0 || rows_ == other.size1()) && bool("Size mismatch") ); + assert( (cols_ == 0 || cols_ == other.size2()) && bool("Size mismatch") ); + + rows_ = other.size1(); + cols_ = other.size2(); + nonzeros_ = other.nnz(); + row_block_num_ = other.row_block_num_; + + viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_buffer_, row_buffer_); + viennacl::backend::typesafe_memory_copy<unsigned int>(other.col_buffer_, col_buffer_); + viennacl::backend::typesafe_memory_copy<unsigned int>(other.row_blocks_, row_blocks_); + viennacl::backend::typesafe_memory_copy<NumericT>(other.elements_, elements_); + + return *this; + } + + /** @brief Assignment a compressed matrix from the product of two compressed_matrix objects (C = A * B). */ + compressed_matrix & operator=(matrix_expression<const compressed_matrix, const compressed_matrix, op_prod> const & proxy) + { + assert( (rows_ == 0 || rows_ == proxy.lhs().size1()) && bool("Size mismatch") ); + assert( (cols_ == 0 || cols_ == proxy.rhs().size2()) && bool("Size mismatch") ); + + viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), *this); + generate_row_block_information(); + + return *this; + } + + + /** @brief Sets the row, column and value arrays of the compressed matrix + * + * Type of row_jumper and col_buffer is 'unsigned int' for CUDA and OpenMP (host) backend, but *must* be cl_uint for OpenCL. + * The reason is that 'unsigned int' might have a different bit representation on the host than 'unsigned int' on the OpenCL device. + * cl_uint is guaranteed to have the correct bit representation for OpenCL devices. + * + * @param row_jumper Pointer to an array holding the indices of the first element of each row (starting with zero). E.g. row_jumper[10] returns the index of the first entry of the 11th row. The array length is 'cols + 1' + * @param col_buffer Pointer to an array holding the column index of each entry. The array length is 'nonzeros' + * @param elements Pointer to an array holding the entries of the sparse matrix. The array length is 'elements' + * @param rows Number of rows of the sparse matrix + * @param cols Number of columns of the sparse matrix + * @param nonzeros Number of nonzeros + */ + void set(const void * row_jumper, + const void * col_buffer, + const NumericT * elements, + vcl_size_t rows, + vcl_size_t cols, + vcl_size_t nonzeros) + { + assert( (rows > 0) && bool("Error in compressed_matrix::set(): Number of rows must be larger than zero!")); + assert( (cols > 0) && bool("Error in compressed_matrix::set(): Number of columns must be larger than zero!")); + assert( (nonzeros > 0) && bool("Error in compressed_matrix::set(): Number of nonzeros must be larger than zero!")); + //std::cout << "Setting memory: " << cols + 1 << ", " << nonzeros << std::endl; + + //row_buffer_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); + viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>(row_buffer_).element_size() * (rows + 1), viennacl::traits::context(row_buffer_), row_jumper); + + //col_buffer_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); + viennacl::backend::memory_create(col_buffer_, viennacl::backend::typesafe_host_array<unsigned int>(col_buffer_).element_size() * nonzeros, viennacl::traits::context(col_buffer_), col_buffer); + + //elements_.switch_active_handle_id(viennacl::backend::OPENCL_MEMORY); + viennacl::backend::memory_create(elements_, sizeof(NumericT) * nonzeros, viennacl::traits::context(elements_), elements); + + nonzeros_ = nonzeros; + rows_ = rows; + cols_ = cols; + + //generate block information for CSR-adaptive: + generate_row_block_information(); + } + + /** @brief Allocate memory for the supplied number of nonzeros in the matrix. Old values are preserved. */ + void reserve(vcl_size_t new_nonzeros, bool preserve = true) + { + if (new_nonzeros > nonzeros_) + { + if (preserve) + { + handle_type col_buffer_old; + handle_type elements_old; + viennacl::backend::memory_shallow_copy(col_buffer_, col_buffer_old); + viennacl::backend::memory_shallow_copy(elements_, elements_old); + + viennacl::backend::typesafe_host_array<unsigned int> size_deducer(col_buffer_); + viennacl::backend::memory_create(col_buffer_, size_deducer.element_size() * new_nonzeros, viennacl::traits::context(col_buffer_)); + viennacl::backend::memory_create(elements_, sizeof(NumericT) * new_nonzeros, viennacl::traits::context(elements_)); + + viennacl::backend::memory_copy(col_buffer_old, col_buffer_, 0, 0, size_deducer.element_size() * nonzeros_); + viennacl::backend::memory_copy(elements_old, elements_, 0, 0, sizeof(NumericT)* nonzeros_); + } + else + { + viennacl::backend::typesafe_host_array<unsigned int> size_deducer(col_buffer_); + viennacl::backend::memory_create(col_buffer_, size_deducer.element_size() * new_nonzeros, viennacl::traits::context(col_buffer_)); + viennacl::backend::memory_create(elements_, sizeof(NumericT) * new_nonzeros, viennacl::traits::context(elements_)); + } + + nonzeros_ = new_nonzeros; + } + } + + /** @brief Resize the matrix. + * + * @param new_size1 New number of rows + * @param new_size2 New number of columns + * @param preserve If true, the old values are preserved. At present, old values are always discarded. + */ + void resize(vcl_size_t new_size1, vcl_size_t new_size2, bool preserve = true) + { + assert(new_size1 > 0 && new_size2 > 0 && bool("Cannot resize to zero size!")); + + if (new_size1 != rows_ || new_size2 != cols_) + { + if (!preserve) + { + viennacl::backend::typesafe_host_array<unsigned int> host_row_buffer(row_buffer_, new_size1 + 1); + viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (new_size1 + 1), viennacl::traits::context(row_buffer_), host_row_buffer.get()); + // faster version without initializing memory: + //viennacl::backend::memory_create(row_buffer_, viennacl::backend::typesafe_host_array<unsigned int>().element_size() * (new_size1 + 1), viennacl::traits::context(row_buffer_)); + nonzeros_ = 0; + } + else + { + std::vector<std::map<unsigned int, NumericT> > stl_sparse_matrix; + if (rows_ > 0) + { + stl_sparse_matrix.resize(rows_); + viennacl::copy(*this, stl_sparse_matrix); + } else { + stl_sparse_matrix.resize(new_size1); + stl_sparse_matrix[0][0] = 0; //enforces nonzero array sizes if matrix was initially empty + } + + stl_sparse_matrix.resize(new_size1); + + //discard entries with column index larger than new_size2 + if (new_size2 < cols_ && rows_ > 0) + { + for (vcl_size_t i=0; i<stl_sparse_matrix.size(); ++i) + { + std::list<unsigned int> to_delete; + for (typename std::map<unsigned int, NumericT>::iterator it = stl_sparse_matrix[i].begin(); + it != stl_sparse_matrix[i].end(); + ++it) + { + if (it->first >= new_size2) + to_delete.push_back(it->first); + } + + for (std::list<unsigned int>::iterator it = to_delete.begin(); it != to_delete.end(); ++it) + stl_sparse_matrix[i].erase(*it); + } + } + + viennacl::tools::sparse_matrix_adapter<NumericT> adapted_matrix(stl_sparse_matrix, new_size1, new_size2); + rows_ = new_size1; + cols_ = new_size2; + viennacl::copy(adapted_matrix, *this); + } + + rows_ = new_size1; + cols_ = new_size2; + } + } + + /** @brief Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity pattern. */ + void clear() + { + viennacl::backend::typesafe_host_array<unsigned int> host_row_buffer(row_buffer_, rows_ + 1); + viennacl::backend::typesafe_host_array<unsigned int> host_col_buffer(col_buffer_, 1); + std::vector<NumericT> host_elements(1); + + viennacl::backend::memory_create(row_buffer_, host_row_buffer.element_size() * (rows_ + 1), viennacl::traits::context(row_buffer_), host_row_buffer.get()); + viennacl::backend::memory_create(col_buffer_, host_col_buffer.element_size() * 1, viennacl::traits::context(col_buffer_), host_col_buffer.get()); + viennacl::backend::memory_create(elements_, sizeof(NumericT) * 1, viennacl::traits::context(elements_), &(host_elements[0])); + + nonzeros_ = 0; + } + + /** @brief Returns a reference to the (i,j)-th entry of the sparse matrix. If (i,j) does not exist (zero), it is inserted (slow!) */ + entry_proxy<NumericT> operator()(vcl_size_t i, vcl_size_t j) + { + assert( (i < rows_) && (j < cols_) && bool("compressed_matrix access out of bounds!")); + + vcl_size_t index = element_index(i, j); + + // check for element in sparsity pattern + if (index < nonzeros_) + return entry_proxy<NumericT>(index, elements_); + + // Element not found. Copying required. Very slow, but direct entry manipulation is painful anyway... + std::vector< std::map<unsigned int, NumericT> > cpu_backup(rows_); + tools::sparse_matrix_adapter<NumericT> adapted_cpu_backup(cpu_backup, rows_, cols_); + viennacl::copy(*this, adapted_cpu_backup); + cpu_backup[i][static_cast<unsigned int>(j)] = 0.0; + viennacl::copy(adapted_cpu_backup, *this); + + index = element_index(i, j); + + assert(index < nonzeros_); + + return entry_proxy<NumericT>(index, elements_); + } + + /** @brief Returns the number of rows */ + const vcl_size_t & size1() const { return rows_; } + /** @brief Returns the number of columns */ + const vcl_size_t & size2() const { return cols_; } + /** @brief Returns the number of nonzero entries */ + const vcl_size_t & nnz() const { return nonzeros_; } + /** @brief Returns the internal number of row blocks for an adaptive SpMV */ + const vcl_size_t & blocks1() const { return row_block_num_; } + + /** @brief Returns the OpenCL handle to the row index array */ + const handle_type & handle1() const { return row_buffer_; } + /** @brief Returns the OpenCL handle to the column index array */ + const handle_type & handle2() const { return col_buffer_; } + /** @brief Returns the OpenCL handle to the row block array */ + const handle_type & handle3() const { return row_blocks_; } + /** @brief Returns the OpenCL handle to the matrix entry array */ + const handle_type & handle() const { return elements_; } + + /** @brief Returns the OpenCL handle to the row index array */ + handle_type & handle1() { return row_buffer_; } + /** @brief Returns the OpenCL handle to the column index array */ + handle_type & handle2() { return col_buffer_; } + /** @brief Returns the OpenCL handle to the row block array */ + handle_type & handle3() { return row_blocks_; } + /** @brief Returns the OpenCL handle to the matrix entry array */ + handle_type & handle() { return elements_; } + + /** @brief Switches the memory context of the matrix. + * + * Allows for e.g. an migration of the full matrix from OpenCL memory to host memory for e.g. computing a preconditioner. + */ + void switch_memory_context(viennacl::context new_ctx) + { + viennacl::backend::switch_memory_context<unsigned int>(row_buffer_, new_ctx); + viennacl::backend::switch_memory_context<unsigned int>(col_buffer_, new_ctx); + viennacl::backend::switch_memory_context<unsigned int>(row_blocks_, new_ctx); + viennacl::backend::switch_memory_context<NumericT>(elements_, new_ctx); + } + + /** @brief Returns the current memory context to determine whether the matrix is set up for OpenMP, OpenCL, or CUDA. */ + viennacl::memory_types memory_context() const + { + return row_buffer_.get_active_handle_id(); + } + +private: + + /** @brief Helper function for accessing the element (i,j) of the matrix. */ + vcl_size_t element_index(vcl_size_t i, vcl_size_t j) + { + //read row indices + viennacl::backend::typesafe_host_array<unsigned int> row_indices(row_buffer_, 2); + viennacl::backend::memory_read(row_buffer_, row_indices.element_size()*i, row_indices.element_size()*2, row_indices.get()); + + //get column indices for row i: + viennacl::backend::typesafe_host_array<unsigned int> col_indices(col_buffer_, row_indices[1] - row_indices[0]); + viennacl::backend::memory_read(col_buffer_, col_indices.element_size()*row_indices[0], row_indices.element_size()*col_indices.size(), col_indices.get()); + + for (vcl_size_t k=0; k<col_indices.size(); ++k) + { + if (col_indices[k] == j) + return row_indices[0] + k; + } + + // if not found, return index past the end of the matrix (cf. matrix.end() in the spirit of the STL) + return nonzeros_; + } + +public: + /** @brief Builds the row block information needed for fast sparse matrix-vector multiplications. + * + * Required when manually populating the memory buffers with values. Not necessary when using viennacl::copy() or .set() + */ + void generate_row_block_information() + { + viennacl::backend::typesafe_host_array<unsigned int> row_buffer(row_buffer_, rows_ + 1); + viennacl::backend::memory_read(row_buffer_, 0, row_buffer.raw_size(), row_buffer.get()); + + viennacl::backend::typesafe_host_array<unsigned int> row_blocks(row_buffer_, rows_ + 1); + + vcl_size_t num_entries_in_current_batch = 0; + + const vcl_size_t shared_mem_size = 1024; // number of column indices loaded to shared memory, number of floating point values loaded to shared memory + + row_block_num_ = 0; + row_blocks.set(0, 0); + for (vcl_size_t i=0; i<rows_; ++i) + { + vcl_size_t entries_in_row = vcl_size_t(row_buffer[i+1]) - vcl_size_t(row_buffer[i]); + num_entries_in_current_batch += entries_in_row; + + if (num_entries_in_current_batch > shared_mem_size) + { + vcl_size_t rows_in_batch = i - row_blocks[row_block_num_]; + if (rows_in_batch > 0) // at least one full row is in the batch. Use current row in next batch. + row_blocks.set(++row_block_num_, i--); + else // row is larger than buffer in shared memory + row_blocks.set(++row_block_num_, i+1); + num_entries_in_current_batch = 0; + } + } + if (num_entries_in_current_batch > 0) + row_blocks.set(++row_block_num_, rows_); + + if (row_block_num_ > 0) //matrix might be empty... + viennacl::backend::memory_create(row_blocks_, + row_blocks.element_size() * (row_block_num_ + 1), + viennacl::traits::context(row_buffer_), row_blocks.get()); + + } + +private: + // /** @brief Copy constructor is by now not available. */ + //compressed_matrix(compressed_matrix const &); + +private: + + vcl_size_t rows_; + vcl_size_t cols_; + vcl_size_t nonzeros_; + vcl_size_t row_block_num_; + handle_type row_buffer_; + handle_type row_blocks_; + handle_type col_buffer_; + handle_type elements_; +}; + +/** @brief Output stream support for compressed_matrix. Output format is same as MATLAB, Octave, or SciPy + * + * @param os STL output stream + * @param A The compressed matrix to be printed. +*/ +template<typename NumericT, unsigned int AlignmentV> +std::ostream & operator<<(std::ostream & os, compressed_matrix<NumericT, AlignmentV> const & A) +{ + std::vector<std::map<unsigned int, NumericT> > tmp(A.size1()); + viennacl::copy(A, tmp); + os << "compressed_matrix of size (" << A.size1() << ", " << A.size2() << ") with " << A.nnz() << " nonzeros:" << std::endl; + + for (vcl_size_t i=0; i<A.size1(); ++i) + { + for (typename std::map<unsigned int, NumericT>::const_iterator it = tmp[i].begin(); it != tmp[i].end(); ++it) + os << " (" << i << ", " << it->first << ")\t" << it->second << std::endl; + } + return os; +} + +// +// Specify available operations: +// + +/** \cond */ + +namespace linalg +{ +namespace detail +{ + // x = A * y + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x = A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs = temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(0)); + } + }; + + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x += A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs += temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(1)); + } + }; + + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x -= A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs -= temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(-1), lhs, T(1)); + } + }; + + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_assign, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); + } + }; + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const compressed_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs += temp_result; + } + }; + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const compressed_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs -= temp_result; + } + }; + +} // namespace detail +} // namespace linalg + /** \endcond */ +} + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/context.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/context.hpp b/native-viennaCL/src/main/cpp/viennacl/context.hpp new file mode 100644 index 0000000..ed00c39 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/context.hpp @@ -0,0 +1,88 @@ +#ifndef VIENNACL_CONTEXT_HPP_ +#define VIENNACL_CONTEXT_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/context.hpp + @brief Implementation of a OpenCL-like context, which serves as a unification of {OpenMP, CUDA, OpenCL} at the user API. +*/ + +#include <vector> +#include <stddef.h> +#include <assert.h> +#include "viennacl/forwards.h" +#include "viennacl/ocl/forwards.h" +#include "viennacl/backend/mem_handle.hpp" + +namespace viennacl +{ +/** @brief Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also suitable for CUDA and OpenMP + * + * Context objects are used to distinguish between different memory domains. One context may refer to an OpenCL device, another context may refer to a CUDA device, and a third context to main RAM. + * Thus, operations are only defined on objects residing on the same context. + */ +class context +{ +public: + context() : mem_type_(viennacl::backend::default_memory_type()) + { +#ifdef VIENNACL_WITH_OPENCL + if (mem_type_ == OPENCL_MEMORY) + ocl_context_ptr_ = &viennacl::ocl::current_context(); + else + ocl_context_ptr_ = NULL; +#endif + } + + explicit context(viennacl::memory_types mtype) : mem_type_(mtype) + { + if (mem_type_ == MEMORY_NOT_INITIALIZED) + mem_type_ = viennacl::backend::default_memory_type(); +#ifdef VIENNACL_WITH_OPENCL + if (mem_type_ == OPENCL_MEMORY) + ocl_context_ptr_ = &viennacl::ocl::current_context(); + else + ocl_context_ptr_ = NULL; +#endif + } + +#ifdef VIENNACL_WITH_OPENCL + context(viennacl::ocl::context const & ctx) : mem_type_(OPENCL_MEMORY), ocl_context_ptr_(&ctx) {} + + viennacl::ocl::context const & opencl_context() const + { + assert(mem_type_ == OPENCL_MEMORY && bool("Context type is not OpenCL")); + return *ocl_context_ptr_; + } +#endif + + // TODO: Add CUDA and OpenMP contexts + + viennacl::memory_types memory_type() const { return mem_type_; } + +private: + viennacl::memory_types mem_type_; +#ifdef VIENNACL_WITH_OPENCL + viennacl::ocl::context const * ocl_context_ptr_; +#endif +}; + + +} + +#endif
