http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/forwards.h ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/forwards.h b/native-viennaCL/src/main/cpp/viennacl/forwards.h new file mode 100644 index 0000000..23a4580 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/forwards.h @@ -0,0 +1,1032 @@ +#ifndef VIENNACL_FORWARDS_H +#define VIENNACL_FORWARDS_H + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/forwards.h + @brief This file provides the forward declarations for the main types used within ViennaCL +*/ + +/** + @mainpage Main Page + + Here you can find all the documentation on how to use the GPU-accelerated linear algebra library ViennaCL. + The formerly separate \ref usermanual "user manual" is no longer available as a standalone PDF, but all integrated into the HTML-based documentation. + Please use the navigation panel on the left to access the desired information. + + Quick links: + - \ref manual-installation "Installation and building the examples" + - \ref manual-types "Basic types" + - \ref manual-operations "Basic operations" + - \ref manual-algorithms "Algorithms" + + + ----------------------------------- + \htmlonly + <div style="align: right; width: 100%"> + <a href="http://www.tuwien.ac.at/"><img src="tuwien.png"></a> + <a href="http://www.iue.tuwien.ac.at/"><img src="iue.png"></a> + <a href="http://www.asc.tuwien.ac.at/"><img src="asc.png"></a> + </div> + \endhtmlonly +*/ + + +//compatibility defines: +#ifdef VIENNACL_HAVE_UBLAS + #define VIENNACL_WITH_UBLAS +#endif + +#ifdef VIENNACL_HAVE_EIGEN + #define VIENNACL_WITH_EIGEN +#endif + +#ifdef VIENNACL_HAVE_MTL4 + #define VIENNACL_WITH_MTL4 +#endif + +#include <cstddef> +#include <cassert> +#include <string> +#include <stdexcept> + +#include "viennacl/meta/enable_if.hpp" +#include "viennacl/version.hpp" + +/** @brief Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them. */ +namespace viennacl +{ + typedef std::size_t vcl_size_t; + typedef std::ptrdiff_t vcl_ptrdiff_t; + + + + /** @brief A tag class representing assignment */ + struct op_assign {}; + /** @brief A tag class representing inplace addition */ + struct op_inplace_add {}; + /** @brief A tag class representing inplace subtraction */ + struct op_inplace_sub {}; + + /** @brief A tag class representing addition */ + struct op_add {}; + /** @brief A tag class representing subtraction */ + struct op_sub {}; + /** @brief A tag class representing multiplication by a scalar */ + struct op_mult {}; + /** @brief A tag class representing matrix-vector products and element-wise multiplications*/ + struct op_prod {}; + /** @brief A tag class representing matrix-matrix products */ + struct op_mat_mat_prod {}; + /** @brief A tag class representing division */ + struct op_div {}; + /** @brief A tag class representing the power function */ + struct op_pow {}; + + /** @brief A tag class representing equality */ + struct op_eq {}; + /** @brief A tag class representing inequality */ + struct op_neq {}; + /** @brief A tag class representing greater-than */ + struct op_greater {}; + /** @brief A tag class representing less-than */ + struct op_less {}; + /** @brief A tag class representing greater-than-or-equal-to */ + struct op_geq {}; + /** @brief A tag class representing less-than-or-equal-to */ + struct op_leq {}; + + /** @brief A tag class representing the summation of a vector */ + struct op_sum {}; + + /** @brief A tag class representing the summation of all rows of a matrix */ + struct op_row_sum {}; + + /** @brief A tag class representing the summation of all columns of a matrix */ + struct op_col_sum {}; + + /** @brief A tag class representing element-wise casting operations on vectors and matrices */ + template<typename OP> + struct op_element_cast {}; + + /** @brief A tag class representing element-wise binary operations (like multiplication) on vectors or matrices */ + template<typename OP> + struct op_element_binary {}; + + /** @brief A tag class representing element-wise unary operations (like sin()) on vectors or matrices */ + template<typename OP> + struct op_element_unary {}; + + /** @brief A tag class representing the modulus function for integers */ + struct op_abs {}; + /** @brief A tag class representing the acos() function */ + struct op_acos {}; + /** @brief A tag class representing the asin() function */ + struct op_asin {}; + /** @brief A tag class for representing the argmax() function */ + struct op_argmax {}; + /** @brief A tag class for representing the argmin() function */ + struct op_argmin {}; + /** @brief A tag class representing the atan() function */ + struct op_atan {}; + /** @brief A tag class representing the atan2() function */ + struct op_atan2 {}; + /** @brief A tag class representing the ceil() function */ + struct op_ceil {}; + /** @brief A tag class representing the cos() function */ + struct op_cos {}; + /** @brief A tag class representing the cosh() function */ + struct op_cosh {}; + /** @brief A tag class representing the exp() function */ + struct op_exp {}; + /** @brief A tag class representing the fabs() function */ + struct op_fabs {}; + /** @brief A tag class representing the fdim() function */ + struct op_fdim {}; + /** @brief A tag class representing the floor() function */ + struct op_floor {}; + /** @brief A tag class representing the fmax() function */ + struct op_fmax {}; + /** @brief A tag class representing the fmin() function */ + struct op_fmin {}; + /** @brief A tag class representing the fmod() function */ + struct op_fmod {}; + /** @brief A tag class representing the log() function */ + struct op_log {}; + /** @brief A tag class representing the log10() function */ + struct op_log10 {}; + /** @brief A tag class representing the sin() function */ + struct op_sin {}; + /** @brief A tag class representing the sinh() function */ + struct op_sinh {}; + /** @brief A tag class representing the sqrt() function */ + struct op_sqrt {}; + /** @brief A tag class representing the tan() function */ + struct op_tan {}; + /** @brief A tag class representing the tanh() function */ + struct op_tanh {}; + + /** @brief A tag class representing the (off-)diagonal of a matrix */ + struct op_matrix_diag {}; + + /** @brief A tag class representing a matrix given by a vector placed on a certain (off-)diagonal */ + struct op_vector_diag {}; + + /** @brief A tag class representing the extraction of a matrix row to a vector */ + struct op_row {}; + + /** @brief A tag class representing the extraction of a matrix column to a vector */ + struct op_column {}; + + /** @brief A tag class representing inner products of two vectors */ + struct op_inner_prod {}; + + /** @brief A tag class representing the 1-norm of a vector */ + struct op_norm_1 {}; + + /** @brief A tag class representing the 2-norm of a vector */ + struct op_norm_2 {}; + + /** @brief A tag class representing the inf-norm of a vector */ + struct op_norm_inf {}; + + /** @brief A tag class representing the maximum of a vector */ + struct op_max {}; + + /** @brief A tag class representing the minimum of a vector */ + struct op_min {}; + + + /** @brief A tag class representing the Frobenius-norm of a matrix */ + struct op_norm_frobenius {}; + + /** @brief A tag class representing transposed matrices */ + struct op_trans {}; + + /** @brief A tag class representing sign flips (for scalars only. Vectors and matrices use the standard multiplication by the scalar -1.0) */ + struct op_flip_sign {}; + + //forward declaration of basic types: + template<class TYPE> + class scalar; + + template<typename LHS, typename RHS, typename OP> + class scalar_expression; + + template<typename SCALARTYPE> + class entry_proxy; + + template<typename SCALARTYPE> + class const_entry_proxy; + + template<typename LHS, typename RHS, typename OP> + class vector_expression; + + template<class SCALARTYPE, unsigned int ALIGNMENT> + class vector_iterator; + + template<class SCALARTYPE, unsigned int ALIGNMENT> + class const_vector_iterator; + + template<typename SCALARTYPE> + class implicit_vector_base; + + template<typename SCALARTYPE> + struct zero_vector; + + template<typename SCALARTYPE> + struct unit_vector; + + template<typename SCALARTYPE> + struct one_vector; + + template<typename SCALARTYPE> + struct scalar_vector; + + template<class SCALARTYPE, typename SizeType = vcl_size_t, typename DistanceType = vcl_ptrdiff_t> + class vector_base; + + template<class SCALARTYPE, unsigned int ALIGNMENT = 1> + class vector; + + template<typename ScalarT> + class vector_tuple; + + //the following forwards are needed for GMRES + template<typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR> + void copy(CPU_ITERATOR const & cpu_begin, + CPU_ITERATOR const & cpu_end, + vector_iterator<SCALARTYPE, ALIGNMENT> gpu_begin); + + template<typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int ALIGNMENT_DEST> + void copy(const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_begin, + const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_end, + vector_iterator<SCALARTYPE, ALIGNMENT_DEST> gpu_dest_begin); + + template<typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int ALIGNMENT_DEST> + void copy(const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_begin, + const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & gpu_src_end, + const_vector_iterator<SCALARTYPE, ALIGNMENT_DEST> gpu_dest_begin); + + template<typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR> + void fast_copy(const const_vector_iterator<SCALARTYPE, ALIGNMENT> & gpu_begin, + const const_vector_iterator<SCALARTYPE, ALIGNMENT> & gpu_end, + CPU_ITERATOR cpu_begin ); + + template<typename CPU_ITERATOR, typename SCALARTYPE, unsigned int ALIGNMENT> + void fast_copy(CPU_ITERATOR const & cpu_begin, + CPU_ITERATOR const & cpu_end, + vector_iterator<SCALARTYPE, ALIGNMENT> gpu_begin); + + + /** @brief Tag class for indicating row-major layout of a matrix. Not passed to the matrix directly, see row_major type. */ + struct row_major_tag {}; + /** @brief Tag class for indicating column-major layout of a matrix. Not passed to the matrix directly, see row_major type. */ + struct column_major_tag {}; + + /** @brief A tag for row-major storage of a dense matrix. */ + struct row_major + { + typedef row_major_tag orientation_category; + + /** @brief Returns the memory offset for entry (i,j) of a dense matrix. + * + * @param i row index + * @param j column index + * @param num_cols number of entries per column (including alignment) + */ + static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t /* num_rows */, vcl_size_t num_cols) + { + return i * num_cols + j; + } + }; + + /** @brief A tag for column-major storage of a dense matrix. */ + struct column_major + { + typedef column_major_tag orientation_category; + + /** @brief Returns the memory offset for entry (i,j) of a dense matrix. + * + * @param i row index + * @param j column index + * @param num_rows number of entries per row (including alignment) + */ + static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t /* num_cols */) + { + return i + j * num_rows; + } + }; + + struct row_iteration; + struct col_iteration; + + template<typename LHS, typename RHS, typename OP> + class matrix_expression; + + class context; + + enum memory_types + { + MEMORY_NOT_INITIALIZED + , MAIN_MEMORY + , OPENCL_MEMORY + , CUDA_MEMORY + }; + + namespace backend + { + class mem_handle; + } + + // + // Matrix types: + // + static const vcl_size_t dense_padding_size = 128; + + /** @brief A dense matrix class + * + * @tparam SCALARTYPE The underlying scalar type (either float or double) + * @tparam ALIGNMENT The internal memory size is given by (size()/ALIGNMENT + 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. + */ + template<typename ROWCOL, typename MATRIXTYPE> + class matrix_iterator; + + template<class SCALARTYPE, typename SizeType = vcl_size_t, typename DistanceType = vcl_ptrdiff_t> + class matrix_base; + + template<class SCALARTYPE, typename F = row_major, unsigned int ALIGNMENT = 1> + class matrix; + + template<typename SCALARTYPE> + class implicit_matrix_base; + + template<class SCALARTYPE> + class identity_matrix; + + template<class SCALARTYPE> + class zero_matrix; + + template<class SCALARTYPE> + class scalar_matrix; + + template<class SCALARTYPE, unsigned int ALIGNMENT = 1> + class compressed_matrix; + + template<class SCALARTYPE> + class compressed_compressed_matrix; + + + template<class SCALARTYPE, unsigned int ALIGNMENT = 128> + class coordinate_matrix; + + template<class SCALARTYPE, unsigned int ALIGNMENT = 1> + class ell_matrix; + + template<typename ScalarT, typename IndexT = unsigned int> + class sliced_ell_matrix; + + template<class SCALARTYPE, unsigned int ALIGNMENT = 1> + class hyb_matrix; + + template<class SCALARTYPE, unsigned int ALIGNMENT = 1> + class circulant_matrix; + + template<class SCALARTYPE, unsigned int ALIGNMENT = 1> + class hankel_matrix; + + template<class SCALARTYPE, unsigned int ALIGNMENT = 1> + class toeplitz_matrix; + + template<class SCALARTYPE, unsigned int ALIGNMENT = 1> + class vandermonde_matrix; + + // + // Proxies: + // + template<typename SizeType = vcl_size_t, typename DistanceType = std::ptrdiff_t> + class basic_range; + + typedef basic_range<> range; + + template<typename SizeType = vcl_size_t, typename DistanceType = std::ptrdiff_t> + class basic_slice; + + typedef basic_slice<> slice; + + template<typename VectorType> + class vector_range; + + template<typename VectorType> + class vector_slice; + + template<typename MatrixType> + class matrix_range; + + template<typename MatrixType> + class matrix_slice; + + + /** @brief Helper struct for checking whether a type is a host scalar type (e.g. float, double) */ + template<typename T> + struct is_cpu_scalar + { + enum { value = false }; + }; + + /** @brief Helper struct for checking whether a type is a viennacl::scalar<> */ + template<typename T> + struct is_scalar + { + enum { value = false }; + }; + + /** @brief Helper struct for checking whether a type represents a sign flip on a viennacl::scalar<> */ + template<typename T> + struct is_flip_sign_scalar + { + enum { value = false }; + }; + + /** @brief Helper struct for checking whether the provided type represents a scalar (either host, from ViennaCL, or a flip-sign proxy) */ + template<typename T> + struct is_any_scalar + { + enum { value = (is_scalar<T>::value || is_cpu_scalar<T>::value || is_flip_sign_scalar<T>::value )}; + }; + + /** @brief Checks for a type being either vector_base or implicit_vector_base */ + template<typename T> + struct is_any_vector { enum { value = 0 }; }; + + /** @brief Checks for either matrix_base or implicit_matrix_base */ + template<typename T> + struct is_any_dense_matrix { enum { value = 0 }; }; + + /** @brief Helper class for checking whether a matrix has a row-major layout. */ + template<typename T> + struct is_row_major + { + enum { value = false }; + }; + + /** @brief Helper class for checking whether a matrix is a compressed_matrix (CSR format) */ + template<typename T> + struct is_compressed_matrix + { + enum { value = false }; + }; + + /** @brief Helper class for checking whether a matrix is a coordinate_matrix (COO format) */ + template<typename T> + struct is_coordinate_matrix + { + enum { value = false }; + }; + + /** @brief Helper class for checking whether a matrix is an ell_matrix (ELL format) */ + template<typename T> + struct is_ell_matrix + { + enum { value = false }; + }; + + /** @brief Helper class for checking whether a matrix is a sliced_ell_matrix (SELL-C-\f$ \sigma \f$ format) */ + template<typename T> + struct is_sliced_ell_matrix + { + enum { value = false }; + }; + + + /** @brief Helper class for checking whether a matrix is a hyb_matrix (hybrid format: ELL plus CSR) */ + template<typename T> + struct is_hyb_matrix + { + enum { value = false }; + }; + + /** @brief Helper class for checking whether the provided type is one of the sparse matrix types (compressed_matrix, coordinate_matrix, etc.) */ + template<typename T> + struct is_any_sparse_matrix + { + enum { value = false }; + }; + + + /** @brief Helper class for checking whether a matrix is a circulant matrix */ + template<typename T> + struct is_circulant_matrix + { + enum { value = false }; + }; + + /** @brief Helper class for checking whether a matrix is a Hankel matrix */ + template<typename T> + struct is_hankel_matrix + { + enum { value = false }; + }; + + /** @brief Helper class for checking whether a matrix is a Toeplitz matrix */ + template<typename T> + struct is_toeplitz_matrix + { + enum { value = false }; + }; + + /** @brief Helper class for checking whether a matrix is a Vandermonde matrix */ + template<typename T> + struct is_vandermonde_matrix + { + enum { value = false }; + }; + + /** @brief Helper class for checking whether the provided type is any of the dense structured matrix types (circulant, Hankel, etc.) */ + template<typename T> + struct is_any_dense_structured_matrix + { + enum { value = viennacl::is_circulant_matrix<T>::value || viennacl::is_hankel_matrix<T>::value || viennacl::is_toeplitz_matrix<T>::value || viennacl::is_vandermonde_matrix<T>::value }; + }; + + + + + /** @brief Exception class in case of memory errors */ + class memory_exception : public std::exception + { + public: + memory_exception() : message_() {} + memory_exception(std::string message) : message_("ViennaCL: Internal memory error: " + message) {} + + virtual const char* what() const throw() { return message_.c_str(); } + + virtual ~memory_exception() throw() {} + private: + std::string message_; + }; + + class cuda_not_available_exception : public std::exception + { + public: + cuda_not_available_exception() : message_("ViennaCL was compiled without CUDA support, but CUDA functionality required for this operation.") {} + + virtual const char* what() const throw() { return message_.c_str(); } + + virtual ~cuda_not_available_exception() throw() {} + private: + std::string message_; + }; + + class zero_on_diagonal_exception : public std::runtime_error + { + public: + zero_on_diagonal_exception(std::string const & what_arg) : std::runtime_error(what_arg) {} + }; + + class unknown_norm_exception : public std::runtime_error + { + public: + unknown_norm_exception(std::string const & what_arg) : std::runtime_error(what_arg) {} + }; + + + + namespace tools + { + //helper for matrix row/col iterators + //must be specialized for every viennacl matrix type + /** @brief Helper class for incrementing an iterator in a dense matrix. */ + template<typename ROWCOL, typename MATRIXTYPE> + struct MATRIX_ITERATOR_INCREMENTER + { + typedef typename MATRIXTYPE::ERROR_SPECIALIZATION_FOR_THIS_MATRIX_TYPE_MISSING ErrorIndicator; + + static void apply(const MATRIXTYPE & /*mat*/, unsigned int & /*row*/, unsigned int & /*col*/) {} + }; + } + + namespace linalg + { +#if !defined(_MSC_VER) || defined(__CUDACC__) + + template<class SCALARTYPE, unsigned int ALIGNMENT> + void convolve_i(viennacl::vector<SCALARTYPE, ALIGNMENT>& input1, + viennacl::vector<SCALARTYPE, ALIGNMENT>& input2, + viennacl::vector<SCALARTYPE, ALIGNMENT>& output); + + template<typename T> + viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_prod> > + element_prod(vector_base<T> const & v1, vector_base<T> const & v2); + + template<typename T> + viennacl::vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<op_div> > + element_div(vector_base<T> const & v1, vector_base<T> const & v2); + + + + template<typename T> + void inner_prod_impl(vector_base<T> const & vec1, + vector_base<T> const & vec2, + scalar<T> & result); + + template<typename LHS, typename RHS, typename OP, typename T> + void inner_prod_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec1, + vector_base<T> const & vec2, + scalar<T> & result); + + template<typename T, typename LHS, typename RHS, typename OP> + void inner_prod_impl(vector_base<T> const & vec1, + viennacl::vector_expression<LHS, RHS, OP> const & vec2, + scalar<T> & result); + + template<typename LHS1, typename RHS1, typename OP1, + typename LHS2, typename RHS2, typename OP2, typename T> + void inner_prod_impl(viennacl::vector_expression<LHS1, RHS1, OP1> const & vec1, + viennacl::vector_expression<LHS2, RHS2, OP2> const & vec2, + scalar<T> & result); + + /////////////////////////// + + template<typename T> + void inner_prod_cpu(vector_base<T> const & vec1, + vector_base<T> const & vec2, + T & result); + + template<typename LHS, typename RHS, typename OP, typename T> + void inner_prod_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec1, + vector_base<T> const & vec2, + T & result); + + template<typename T, typename LHS, typename RHS, typename OP> + void inner_prod_cpu(vector_base<T> const & vec1, + viennacl::vector_expression<LHS, RHS, OP> const & vec2, + T & result); + + template<typename LHS1, typename RHS1, typename OP1, + typename LHS2, typename RHS2, typename OP2, typename S3> + void inner_prod_cpu(viennacl::vector_expression<LHS1, RHS1, OP1> const & vec1, + viennacl::vector_expression<LHS2, RHS2, OP2> const & vec2, + S3 & result); + + + + //forward definition of norm_1_impl function + template<typename T> + void norm_1_impl(vector_base<T> const & vec, scalar<T> & result); + + template<typename LHS, typename RHS, typename OP, typename T> + void norm_1_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, + scalar<T> & result); + + + template<typename T> + void norm_1_cpu(vector_base<T> const & vec, + T & result); + + template<typename LHS, typename RHS, typename OP, typename S2> + void norm_1_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, + S2 & result); + + //forward definition of norm_2_impl function + template<typename T> + void norm_2_impl(vector_base<T> const & vec, scalar<T> & result); + + template<typename LHS, typename RHS, typename OP, typename T> + void norm_2_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, + scalar<T> & result); + + template<typename T> + void norm_2_cpu(vector_base<T> const & vec, T & result); + + template<typename LHS, typename RHS, typename OP, typename S2> + void norm_2_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, + S2 & result); + + + //forward definition of norm_inf_impl function + template<typename T> + void norm_inf_impl(vector_base<T> const & vec, scalar<T> & result); + + template<typename LHS, typename RHS, typename OP, typename T> + void norm_inf_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, + scalar<T> & result); + + + template<typename T> + void norm_inf_cpu(vector_base<T> const & vec, T & result); + + template<typename LHS, typename RHS, typename OP, typename S2> + void norm_inf_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, + S2 & result); + + //forward definition of max()-related functions + template<typename T> + void max_impl(vector_base<T> const & vec, scalar<T> & result); + + template<typename LHS, typename RHS, typename OP, typename T> + void max_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, + scalar<T> & result); + + + template<typename T> + void max_cpu(vector_base<T> const & vec, T & result); + + template<typename LHS, typename RHS, typename OP, typename S2> + void max_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, + S2 & result); + + //forward definition of min()-related functions + template<typename T> + void min_impl(vector_base<T> const & vec, scalar<T> & result); + + template<typename LHS, typename RHS, typename OP, typename T> + void min_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, + scalar<T> & result); + + + template<typename T> + void min_cpu(vector_base<T> const & vec, T & result); + + template<typename LHS, typename RHS, typename OP, typename S2> + void min_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, + S2 & result); + + //forward definition of sum()-related functions + template<typename T> + void sum_impl(vector_base<T> const & vec, scalar<T> & result); + + template<typename LHS, typename RHS, typename OP, typename T> + void sum_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec, + scalar<T> & result); + + + template<typename T> + void sum_cpu(vector_base<T> const & vec, T & result); + + template<typename LHS, typename RHS, typename OP, typename S2> + void sum_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec, + S2 & result); + + + // forward definition of frobenius norm: + template<typename T> + void norm_frobenius_impl(matrix_base<T> const & vec, scalar<T> & result); + + template<typename T> + void norm_frobenius_cpu(matrix_base<T> const & vec, T & result); + + + template<typename T> + vcl_size_t index_norm_inf(vector_base<T> const & vec); + + template<typename LHS, typename RHS, typename OP> + vcl_size_t index_norm_inf(viennacl::vector_expression<LHS, RHS, OP> const & vec); + + //forward definition of prod_impl functions + + template<typename NumericT> + void prod_impl(const matrix_base<NumericT> & mat, + const vector_base<NumericT> & vec, + vector_base<NumericT> & result); + + template<typename NumericT> + void prod_impl(const matrix_expression< const matrix_base<NumericT>, const matrix_base<NumericT>, op_trans> & mat_trans, + const vector_base<NumericT> & vec, + vector_base<NumericT> & result); + + template<typename SparseMatrixType, class SCALARTYPE, unsigned int ALIGNMENT> + typename viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value, + vector_expression<const SparseMatrixType, + const vector<SCALARTYPE, ALIGNMENT>, + op_prod > + >::type + prod_impl(const SparseMatrixType & mat, + const vector<SCALARTYPE, ALIGNMENT> & vec); + + // forward definition of summation routines for matrices: + + template<typename NumericT> + void row_sum_impl(const matrix_base<NumericT> & A, + vector_base<NumericT> & result); + + template<typename NumericT> + void column_sum_impl(const matrix_base<NumericT> & A, + vector_base<NumericT> & result); + +#endif + + namespace detail + { + enum row_info_types + { + SPARSE_ROW_NORM_INF = 0, + SPARSE_ROW_NORM_1, + SPARSE_ROW_NORM_2, + SPARSE_ROW_DIAGONAL + }; + + } + + + /** @brief A tag class representing a lower triangular matrix */ + struct lower_tag + { + static const char * name() { return "lower"; } + }; //lower triangular matrix + /** @brief A tag class representing an upper triangular matrix */ + struct upper_tag + { + static const char * name() { return "upper"; } + }; //upper triangular matrix + /** @brief A tag class representing a lower triangular matrix with unit diagonal*/ + struct unit_lower_tag + { + static const char * name() { return "unit_lower"; } + }; //unit lower triangular matrix + /** @brief A tag class representing an upper triangular matrix with unit diagonal*/ + struct unit_upper_tag + { + static const char * name() { return "unit_upper"; } + }; //unit upper triangular matrix + + //preconditioner tags + class ilut_tag; + + /** @brief A tag class representing the use of no preconditioner */ + class no_precond + { + public: + template<typename VectorType> + void apply(VectorType &) const {} + }; + + + } //namespace linalg + + // + // More namespace comments to follow: + // + + /** @brief Namespace providing routines for handling the different memory domains. */ + namespace backend + { + /** @brief Provides implementations for handling memory buffers in CPU RAM. */ + namespace cpu_ram + { + /** @brief Holds implementation details for handling memory buffers in CPU RAM. Not intended for direct use by library users. */ + namespace detail {} + } + + /** @brief Provides implementations for handling CUDA memory buffers. */ + namespace cuda + { + /** @brief Holds implementation details for handling CUDA memory buffers. Not intended for direct use by library users. */ + namespace detail {} + } + + /** @brief Implementation details for the generic memory backend interface. */ + namespace detail {} + + /** @brief Provides implementations for handling OpenCL memory buffers. */ + namespace opencl + { + /** @brief Holds implementation details for handling OpenCL memory buffers. Not intended for direct use by library users. */ + namespace detail {} + } + } + + + /** @brief Holds implementation details for functionality in the main viennacl-namespace. Not intended for direct use by library users. */ + namespace detail + { + /** @brief Helper namespace for fast Fourier transforms. Not to be used directly by library users. */ + namespace fft + { + /** @brief Helper namespace for fast-Fourier transformation. Deprecated. */ + namespace FFT_DATA_ORDER {} + } + } + + + /** @brief Provides an OpenCL kernel generator. */ + namespace device_specific + { + /** @brief Provides the implementation for tuning the kernels for a particular device. */ + namespace autotune {} + + /** @brief Contains implementation details of the kernel generator. */ + namespace detail {} + + /** @brief Namespace holding the various device-specific parameters for generating the best kernels. */ + namespace profiles {} + + /** @brief Contains various helper routines for kernel generation. */ + namespace utils {} + } + + /** @brief Provides basic input-output functionality. */ + namespace io + { + /** @brief Implementation details for IO functionality. Usually not of interest for a library user. */ + namespace detail {} + + /** @brief Namespace holding the various XML tag definitions for the kernel parameter tuning facility. */ + namespace tag {} + + /** @brief Namespace holding the various XML strings for the kernel parameter tuning facility. */ + namespace val {} + } + + /** @brief Provides all linear algebra operations which are not covered by operator overloads. */ + namespace linalg + { + /** @brief Holds all CUDA compute kernels used by ViennaCL. */ + namespace cuda + { + /** @brief Helper functions for the CUDA linear algebra backend. */ + namespace detail {} + } + + /** @brief Namespace holding implementation details for linear algebra routines. Usually not of interest for a library user. */ + namespace detail + { + /** @brief Implementation namespace for algebraic multigrid preconditioner. */ + namespace amg {} + + /** @brief Implementation namespace for sparse approximate inverse preconditioner. */ + namespace spai {} + } + + /** @brief Holds all compute kernels with conventional host-based execution (buffers in CPU RAM). */ + namespace host_based + { + /** @brief Helper functions for the host-based linear algebra backend. */ + namespace detail {} + } + + /** @brief Namespace containing the OpenCL kernels. Deprecated, will be moved to viennacl::linalg::opencl in future releases. */ + namespace kernels {} + + /** @brief Holds all routines providing OpenCL linear algebra operations. */ + namespace opencl + { + /** @brief Helper functions for OpenCL-accelerated linear algebra operations. */ + namespace detail {} + + /** @brief Contains the OpenCL kernel generation functions for a predefined set of functionality. */ + namespace kernels + { + /** @brief Implementation details for the predefined OpenCL kernels. */ + namespace detail {} + } + } + } + + /** @brief OpenCL backend. Manages platforms, contexts, buffers, kernels, etc. */ + namespace ocl {} + + /** @brief Namespace containing many meta-functions. */ + namespace result_of {} + + /** @brief Namespace for various tools used within ViennaCL. */ + namespace tools + { + /** @brief Contains implementation details for the tools. Usually not of interest for the library user. */ + namespace detail {} + } + + /** @brief Namespace providing traits-information as well as generic wrappers to common routines for vectors and matrices such as size() or clear() */ + namespace traits {} + + /** @brief Contains the scheduling functionality which allows for dynamic kernel generation as well as the fusion of multiple statements into a single kernel. */ + namespace scheduler + { + /** @brief Implementation details for the scheduler */ + namespace detail {} + + /** @brief Helper metafunctions used for the scheduler */ + namespace result_of {} + } + +} //namespace viennacl + +#endif + +/*@}*/
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/hankel_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/hankel_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/hankel_matrix.hpp new file mode 100644 index 0000000..084e6c8 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/hankel_matrix.hpp @@ -0,0 +1,343 @@ +#ifndef VIENNACL_HANKEL_MATRIX_HPP +#define VIENNACL_HANKEL_MATRIX_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file hankel_matrix.hpp + @brief Implementation of the hankel_matrix class for efficient manipulation of Hankel matrices. Experimental. +*/ + +#include "viennacl/forwards.h" +#include "viennacl/vector.hpp" +#include "viennacl/ocl/backend.hpp" + +#include "viennacl/toeplitz_matrix.hpp" +#include "viennacl/fft.hpp" + +#include "viennacl/linalg/hankel_matrix_operations.hpp" + +namespace viennacl +{ +/** @brief A Hankel matrix class + * + * @tparam NumericT The underlying scalar type (either float or double) + * @tparam AlignmentV The internal memory size is given by (size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best values or usually 4, 8 or 16, higher values are usually a waste of memory. + */ +template<class NumericT, unsigned int AlignmentV> +class hankel_matrix +{ +public: + typedef viennacl::backend::mem_handle handle_type; + typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type; + + /** + * @brief The default constructor. Does not allocate any memory. + * + */ + explicit hankel_matrix() {} + + /** + * @brief Creates the matrix with the given size + * + * @param rows Number of rows of the matrix + * @param cols Number of columns of the matrix + */ + explicit hankel_matrix(vcl_size_t rows, vcl_size_t cols) : elements_(rows, cols) + { + assert(rows == cols && bool("Hankel matrix must be square!")); + (void)cols; // avoid 'unused parameter' warning in optimized builds + } + + /** @brief Resizes the matrix. + * Existing entries can be preserved + * + * @param sz New size of matrix + * @param preserve If true, existing values are preserved. + */ + void resize(vcl_size_t sz, bool preserve = true) + { + elements_.resize(sz, preserve); + } + + /** @brief Returns the OpenCL handle + * + * @return OpenCL handle + */ + handle_type const & handle() const { return elements_.handle(); } + + /** + * @brief Returns an internal viennacl::toeplitz_matrix, which represents a Hankel matrix elements + * + */ + toeplitz_matrix<NumericT, AlignmentV> & elements() { return elements_; } + toeplitz_matrix<NumericT, AlignmentV> const & elements() const { return elements_; } + + /** + * @brief Returns the number of rows of the matrix + */ + vcl_size_t size1() const { return elements_.size1(); } + + /** + * @brief Returns the number of columns of the matrix + */ + vcl_size_t size2() const { return elements_.size2(); } + + /** @brief Returns the internal size of matrix representtion. + * Usually required for launching OpenCL kernels only + * + * @return Internal size of matrix representation + */ + vcl_size_t internal_size() const { return elements_.internal_size(); } + + /** + * @brief Read-write access to a element of the matrix + * + * @param row_index Row index of accessed element + * @param col_index Column index of accessed element + * @return Proxy for matrix entry + */ + entry_proxy<NumericT> operator()(unsigned int row_index, unsigned int col_index) + { + assert(row_index < size1() && col_index < size2() && bool("Invalid access")); + + return elements_(size1() - row_index - 1, col_index); + } + + /** + * @brief += operation for Hankel matrices + * + * @param that Matrix which will be added + * @return Result of addition + */ + hankel_matrix<NumericT, AlignmentV>& operator +=(hankel_matrix<NumericT, AlignmentV>& that) + { + elements_ += that.elements(); + return *this; + } + +private: + hankel_matrix(hankel_matrix const &) {} + hankel_matrix & operator=(hankel_matrix const & t); + + toeplitz_matrix<NumericT, AlignmentV> elements_; +}; + +/** @brief Copies a Hankel matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) + * + * + * @param cpu_vec A std::vector on the host. + * @param gpu_mat A hankel_matrix from ViennaCL + */ +template<typename NumericT, unsigned int AlignmentV> +void copy(std::vector<NumericT> const & cpu_vec, hankel_matrix<NumericT, AlignmentV> & gpu_mat) +{ + assert((gpu_mat.size1() * 2 - 1) == cpu_vec.size() && bool("Size mismatch")); + + copy(cpu_vec, gpu_mat.elements()); +} + +/** @brief Copies a Hankel matrix from the OpenCL device (either GPU or multi-core CPU) to the std::vector + * + * + * @param gpu_mat A hankel_matrix from ViennaCL + * @param cpu_vec A std::vector on the host. + */ +template<typename NumericT, unsigned int AlignmentV> +void copy(hankel_matrix<NumericT, AlignmentV> const & gpu_mat, std::vector<NumericT> & cpu_vec) +{ + assert((gpu_mat.size1() * 2 - 1) == cpu_vec.size() && bool("Size mismatch")); + + copy(gpu_mat.elements(), cpu_vec); +} + +/** @brief Copies a Hankel matrix from the OpenCL device (either GPU or multi-core CPU) to the matrix-like object + * + * + * @param han_src A hankel_matrix from ViennaCL + * @param com_dst A matrix-like object + */ +template<typename NumericT, unsigned int AlignmentV, typename MatrixT> +void copy(hankel_matrix<NumericT, AlignmentV> const & han_src, MatrixT& com_dst) +{ + assert( (viennacl::traits::size1(com_dst) == han_src.size1()) && bool("Size mismatch") ); + assert( (viennacl::traits::size2(com_dst) == han_src.size2()) && bool("Size mismatch") ); + + vcl_size_t size = han_src.size1(); + std::vector<NumericT> tmp(size * 2 - 1); + copy(han_src, tmp); + + for (vcl_size_t i = 0; i < size; i++) + for (vcl_size_t j = 0; j < size; j++) + com_dst(i, j) = tmp[i + j]; +} + +/** @brief Copies a the matrix-like object to the Hankel matrix from the OpenCL device (either GPU or multi-core CPU) + * + * + * @param com_src A std::vector on the host + * @param han_dst A hankel_matrix from ViennaCL + */ +template<typename NumericT, unsigned int AlignmentV, typename MatrixT> +void copy(MatrixT const & com_src, hankel_matrix<NumericT, AlignmentV>& han_dst) +{ + assert( (han_dst.size1() == 0 || viennacl::traits::size1(com_src) == han_dst.size1()) && bool("Size mismatch") ); + assert( (han_dst.size2() == 0 || viennacl::traits::size2(com_src) == han_dst.size2()) && bool("Size mismatch") ); + assert( viennacl::traits::size2(com_src) == viennacl::traits::size1(com_src) && bool("Logic error: non-square Hankel matrix!") ); + + vcl_size_t size = viennacl::traits::size1(com_src); + + std::vector<NumericT> tmp(2*size - 1); + + for (vcl_size_t i = 0; i < size; i++) + tmp[i] = com_src(0, i); + + for (vcl_size_t i = 1; i < size; i++) + tmp[size + i - 1] = com_src(size - 1, i); + + viennacl::copy(tmp, han_dst); +} + +/*template<typename NumericT, unsigned int AlignmentV, unsigned int VECTOR_AlignmentV> + void prod_impl(hankel_matrix<NumericT, AlignmentV>& mat, + vector<NumericT, VECTOR_AlignmentV>& vec, + vector<NumericT, VECTOR_AlignmentV>& result) + { + prod_impl(mat.elements(), vec, result); + fft::reverse(result); + }*/ + +template<class NumericT, unsigned int AlignmentV> +std::ostream & operator<<(std::ostream & s, hankel_matrix<NumericT, AlignmentV>& gpu_matrix) +{ + vcl_size_t size = gpu_matrix.size1(); + std::vector<NumericT> tmp(2*size - 1); + copy(gpu_matrix, tmp); + s << "[" << size << "," << size << "]("; + + for (vcl_size_t i = 0; i < size; i++) + { + s << "("; + for (vcl_size_t j = 0; j < size; j++) + { + s << tmp[i + j]; + //s << (int)i - (int)j; + if (j < (size - 1)) s << ","; + } + s << ")"; + } + s << ")"; + return s; +} + +// +// Specify available operations: +// + +/** \cond */ + +namespace linalg +{ +namespace detail +{ + // x = A * y + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_assign, vector_expression<const hankel_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hankel_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x = A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); + lhs = temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); + } + }; + + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hankel_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hankel_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); + lhs += temp; + } + }; + + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hankel_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hankel_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); + lhs -= temp; + } + }; + + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_assign, vector_expression<const hankel_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hankel_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs()); + viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); + } + }; + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hankel_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hankel_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs()); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs += temp_result; + } + }; + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hankel_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hankel_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs()); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs -= temp_result; + } + }; + + + +} // namespace detail +} // namespace linalg + +/** \endcond */ +} +#endif // VIENNACL_HANKEL_MATRIX_HPP http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/hyb_matrix.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/hyb_matrix.hpp b/native-viennaCL/src/main/cpp/viennacl/hyb_matrix.hpp new file mode 100644 index 0000000..e93ede5 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/hyb_matrix.hpp @@ -0,0 +1,442 @@ +#ifndef VIENNACL_HYB_MATRIX_HPP_ +#define VIENNACL_HYB_MATRIX_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/hyb_matrix.hpp + @brief Implementation of the hyb_matrix class + + Contributed by Volodymyr Kysenko. +*/ + +#include "viennacl/forwards.h" +#include "viennacl/vector.hpp" + +#include "viennacl/tools/tools.hpp" + +#include "viennacl/linalg/sparse_matrix_operations.hpp" + +namespace viennacl +{ +/** @brief Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros. */ +template<typename NumericT, unsigned int AlignmentV /* see forwards.h for default argument */> +class hyb_matrix +{ +public: + typedef viennacl::backend::mem_handle handle_type; + typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType> value_type; + + hyb_matrix() : csr_threshold_(NumericT(0.8)), rows_(0), cols_(0) {} + + hyb_matrix(viennacl::context ctx) : csr_threshold_(NumericT(0.8)), rows_(0), cols_(0) + { + ell_coords_.switch_active_handle_id(ctx.memory_type()); + ell_elements_.switch_active_handle_id(ctx.memory_type()); + + csr_rows_.switch_active_handle_id(ctx.memory_type()); + csr_cols_.switch_active_handle_id(ctx.memory_type()); + csr_elements_.switch_active_handle_id(ctx.memory_type()); + +#ifdef VIENNACL_WITH_OPENCL + if (ctx.memory_type() == OPENCL_MEMORY) + { + ell_coords_.opencl_handle().context(ctx.opencl_context()); + ell_elements_.opencl_handle().context(ctx.opencl_context()); + + csr_rows_.opencl_handle().context(ctx.opencl_context()); + csr_cols_.opencl_handle().context(ctx.opencl_context()); + csr_elements_.opencl_handle().context(ctx.opencl_context()); + } +#endif + } + + /** @brief Resets all entries in the matrix back to zero without changing the matrix size. Resets the sparsity pattern. */ + void clear() + { + // ELL part: + ellnnz_ = 0; + + viennacl::backend::typesafe_host_array<unsigned int> host_coords_buffer(ell_coords_, internal_size1()); + std::vector<NumericT> host_elements(internal_size1()); + + viennacl::backend::memory_create(ell_coords_, host_coords_buffer.element_size() * internal_size1(), viennacl::traits::context(ell_coords_), host_coords_buffer.get()); + viennacl::backend::memory_create(ell_elements_, sizeof(NumericT) * internal_size1(), viennacl::traits::context(ell_elements_), &(host_elements[0])); + + // CSR part: + csrnnz_ = 0; + + viennacl::backend::typesafe_host_array<unsigned int> host_row_buffer(csr_rows_, rows_ + 1); + viennacl::backend::typesafe_host_array<unsigned int> host_col_buffer(csr_cols_, 1); + host_elements.resize(1); + + viennacl::backend::memory_create(csr_rows_, host_row_buffer.element_size() * (rows_ + 1), viennacl::traits::context(csr_rows_), host_row_buffer.get()); + viennacl::backend::memory_create(csr_cols_, host_col_buffer.element_size() * 1, viennacl::traits::context(csr_cols_), host_col_buffer.get()); + viennacl::backend::memory_create(csr_elements_, sizeof(NumericT) * 1, viennacl::traits::context(csr_elements_), &(host_elements[0])); + } + + NumericT csr_threshold() const { return csr_threshold_; } + void csr_threshold(NumericT thr) { csr_threshold_ = thr; } + + vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, AlignmentV); } + vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple<vcl_size_t>(cols_, AlignmentV); } + + vcl_size_t size1() const { return rows_; } + vcl_size_t size2() const { return cols_; } + + vcl_size_t internal_ellnnz() const {return viennacl::tools::align_to_multiple<vcl_size_t>(ellnnz_, AlignmentV); } + vcl_size_t ell_nnz() const { return ellnnz_; } + vcl_size_t csr_nnz() const { return csrnnz_; } + + const handle_type & handle() const { return ell_elements_; } + const handle_type & handle2() const { return ell_coords_; } + const handle_type & handle3() const { return csr_rows_; } + const handle_type & handle4() const { return csr_cols_; } + const handle_type & handle5() const { return csr_elements_; } + +public: +#if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment + template<typename CPUMatrixT> + friend void copy(const CPUMatrixT & cpu_matrix, hyb_matrix & gpu_matrix ); +#else + template<typename CPUMatrixT, typename T, unsigned int ALIGN> + friend void copy(const CPUMatrixT & cpu_matrix, hyb_matrix<T, ALIGN> & gpu_matrix ); +#endif + +private: + NumericT csr_threshold_; + vcl_size_t rows_; + vcl_size_t cols_; + vcl_size_t ellnnz_; + vcl_size_t csrnnz_; + + handle_type ell_coords_; // ell coords + handle_type ell_elements_; // ell elements + + handle_type csr_rows_; + handle_type csr_cols_; + handle_type csr_elements_; +}; + +template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> +void copy(const CPUMatrixT& cpu_matrix, hyb_matrix<NumericT, AlignmentV>& gpu_matrix ) +{ + assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); + + if (cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0) + { + //determine max capacity for row + vcl_size_t max_entries_per_row = 0; + std::vector<vcl_size_t> hist_entries(cpu_matrix.size2() + 1, 0); + + for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) + { + vcl_size_t num_entries = 0; + for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) + { + ++num_entries; + } + + hist_entries[num_entries] += 1; + max_entries_per_row = std::max(max_entries_per_row, num_entries); + } + + vcl_size_t sum = 0; + for (vcl_size_t ind = 0; ind <= max_entries_per_row; ind++) + { + sum += hist_entries[ind]; + + if (NumericT(sum) >= NumericT(gpu_matrix.csr_threshold()) * NumericT(cpu_matrix.size1())) + { + max_entries_per_row = ind; + break; + } + } + + //setup GPU matrix + gpu_matrix.ellnnz_ = max_entries_per_row; + gpu_matrix.rows_ = cpu_matrix.size1(); + gpu_matrix.cols_ = cpu_matrix.size2(); + + vcl_size_t nnz = gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz(); + + viennacl::backend::typesafe_host_array<unsigned int> ell_coords(gpu_matrix.ell_coords_, nnz); + viennacl::backend::typesafe_host_array<unsigned int> csr_rows(gpu_matrix.csr_rows_, cpu_matrix.size1() + 1); + std::vector<unsigned int> csr_cols; + + std::vector<NumericT> ell_elements(nnz); + std::vector<NumericT> csr_elements; + + vcl_size_t csr_index = 0; + + for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) + { + vcl_size_t data_index = 0; + + csr_rows.set(row_it.index1(), csr_index); + + for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) + { + if (data_index < max_entries_per_row) + { + ell_coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2()); + ell_elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it; + } + else + { + csr_cols.push_back(static_cast<unsigned int>(col_it.index2())); + csr_elements.push_back(*col_it); + + csr_index++; + } + + data_index++; + } + + } + + if (csr_cols.empty()) + { + csr_cols.push_back(0); + csr_elements.push_back(0); + } + + csr_rows.set(csr_rows.size() - 1, csr_index); + + gpu_matrix.csrnnz_ = csr_cols.size(); + + viennacl::backend::typesafe_host_array<unsigned int> csr_cols_for_gpu(gpu_matrix.csr_cols_, csr_cols.size()); + for (vcl_size_t i=0; i<csr_cols.size(); ++i) + csr_cols_for_gpu.set(i, csr_cols[i]); + + viennacl::backend::memory_create(gpu_matrix.ell_coords_, ell_coords.raw_size(), traits::context(gpu_matrix.ell_coords_), ell_coords.get()); + viennacl::backend::memory_create(gpu_matrix.ell_elements_, sizeof(NumericT) * ell_elements.size(), traits::context(gpu_matrix.ell_elements_), &(ell_elements[0])); + + viennacl::backend::memory_create(gpu_matrix.csr_rows_, csr_rows.raw_size(), traits::context(gpu_matrix.csr_rows_), csr_rows.get()); + viennacl::backend::memory_create(gpu_matrix.csr_cols_, csr_cols_for_gpu.raw_size(), traits::context(gpu_matrix.csr_cols_), csr_cols_for_gpu.get()); + viennacl::backend::memory_create(gpu_matrix.csr_elements_, sizeof(NumericT) * csr_elements.size(), traits::context(gpu_matrix.csr_elements_), &(csr_elements[0])); + } +} + + +/** @brief Copies a sparse matrix from the host to the compute device. The host type is the std::vector< std::map < > > format . + * + * @param cpu_matrix A sparse matrix on the host composed of an STL vector and an STL map. + * @param gpu_matrix The sparse hyb_matrix from ViennaCL + */ +template<typename IndexT, typename NumericT, unsigned int AlignmentV> +void copy(std::vector< std::map<IndexT, NumericT> > const & cpu_matrix, + hyb_matrix<NumericT, AlignmentV> & gpu_matrix) +{ + vcl_size_t max_col = 0; + for (vcl_size_t i=0; i<cpu_matrix.size(); ++i) + { + if (cpu_matrix[i].size() > 0) + max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first); + } + + viennacl::copy(tools::const_sparse_matrix_adapter<NumericT, IndexT>(cpu_matrix, cpu_matrix.size(), max_col + 1), gpu_matrix); +} + + + + +template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV> +void copy(const hyb_matrix<NumericT, AlignmentV>& gpu_matrix, CPUMatrixT& cpu_matrix) +{ + assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); + assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); + + if (gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0) + { + std::vector<NumericT> ell_elements(gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz()); + viennacl::backend::typesafe_host_array<unsigned int> ell_coords(gpu_matrix.handle2(), gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz()); + + std::vector<NumericT> csr_elements(gpu_matrix.csr_nnz()); + viennacl::backend::typesafe_host_array<unsigned int> csr_rows(gpu_matrix.handle3(), gpu_matrix.size1() + 1); + viennacl::backend::typesafe_host_array<unsigned int> csr_cols(gpu_matrix.handle4(), gpu_matrix.csr_nnz()); + + viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * ell_elements.size(), &(ell_elements[0])); + viennacl::backend::memory_read(gpu_matrix.handle2(), 0, ell_coords.raw_size(), ell_coords.get()); + viennacl::backend::memory_read(gpu_matrix.handle3(), 0, csr_rows.raw_size(), csr_rows.get()); + viennacl::backend::memory_read(gpu_matrix.handle4(), 0, csr_cols.raw_size(), csr_cols.get()); + viennacl::backend::memory_read(gpu_matrix.handle5(), 0, sizeof(NumericT) * csr_elements.size(), &(csr_elements[0])); + + + for (vcl_size_t row = 0; row < gpu_matrix.size1(); row++) + { + for (vcl_size_t ind = 0; ind < gpu_matrix.internal_ellnnz(); ind++) + { + vcl_size_t offset = gpu_matrix.internal_size1() * ind + row; + + NumericT val = ell_elements[offset]; + if (val <= 0 && val >= 0) // val == 0 without compiler warnings + continue; + + if (ell_coords[offset] >= gpu_matrix.size2()) + { + std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << ell_coords[offset] << " " << gpu_matrix.size2() << std::endl; + return; + } + + cpu_matrix(row, ell_coords[offset]) = val; + } + + for (vcl_size_t ind = csr_rows[row]; ind < csr_rows[row+1]; ind++) + { + NumericT val = csr_elements[ind]; + if (val <= 0 && val >= 0) // val == 0 without compiler warnings + continue; + + if (csr_cols[ind] >= gpu_matrix.size2()) + { + std::cerr << "ViennaCL encountered invalid data " << std::endl; + return; + } + + cpu_matrix(row, csr_cols[ind]) = val; + } + } + } +} + +/** @brief Copies a sparse matrix from the compute device to the host. The host type is the std::vector< std::map < > > format . + * + * @param gpu_matrix The sparse hyb_matrix from ViennaCL + * @param cpu_matrix A sparse matrix on the host composed of an STL vector and an STL map. + */ +template<typename NumericT, unsigned int AlignmentV, typename IndexT> +void copy(const hyb_matrix<NumericT, AlignmentV> & gpu_matrix, + std::vector< std::map<IndexT, NumericT> > & cpu_matrix) +{ + if (cpu_matrix.size() == 0) + cpu_matrix.resize(gpu_matrix.size1()); + + assert(cpu_matrix.size() == gpu_matrix.size1() && bool("Matrix dimension mismatch!")); + + tools::sparse_matrix_adapter<NumericT, IndexT> temp(cpu_matrix, cpu_matrix.size(), gpu_matrix.size2()); + viennacl::copy(gpu_matrix, temp); +} + +// +// Specify available operations: +// + +/** \cond */ + +namespace linalg +{ +namespace detail +{ + // x = A * y + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_assign, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x = A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs = temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(0)); + } + }; + + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x += A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs += temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(1)); + } + }; + + template<typename T, unsigned int A> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) + { + // check for the special case x -= A * x + if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) + { + viennacl::vector<T> temp(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0)); + lhs -= temp; + } + else + viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(-1), lhs, T(1)); + } + }; + + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_assign, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); + } + }; + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs += temp_result; + } + }; + + // x = A * vec_op + template<typename T, unsigned int A, typename LHS, typename RHS, typename OP> + struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > + { + static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) + { + viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); + viennacl::vector<T> temp_result(lhs); + viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); + lhs -= temp_result; + } + }; + +} // namespace detail +} // namespace linalg + +/** \endcond */ +} + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/io/matrix_market.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/io/matrix_market.hpp b/native-viennaCL/src/main/cpp/viennacl/io/matrix_market.hpp new file mode 100644 index 0000000..e8444ee --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/io/matrix_market.hpp @@ -0,0 +1,440 @@ +#ifndef VIENNACL_IO_MATRIX_MARKET_HPP +#define VIENNACL_IO_MATRIX_MARKET_HPP + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file matrix_market.hpp + @brief A reader and writer for the matrix market format is implemented here +*/ + +#include <algorithm> +#include <string> +#include <iostream> +#include <fstream> +#include <sstream> +#include <vector> +#include <map> +#include <cctype> +#include "viennacl/tools/adapter.hpp" +#include "viennacl/traits/size.hpp" +#include "viennacl/traits/fill.hpp" + +namespace viennacl +{ +namespace io +{ +//helper +namespace detail +{ + inline void trim(char * buffer, long max_size) + { + //trim at beginning of string + long start = 0; + for (long i=0; i<max_size; ++i) + { + if (buffer[i] == ' ') + ++start; + else + break; + } + + //trim at end of string + long stop = start; + for (long i=stop; i<max_size; ++i) + { + if (buffer[i] == 0) //end of string + break; + + if (buffer[i] != ' ') + stop = i; + } + + for (long i=0; i<=stop - start; ++i) + { + buffer[i] = buffer[start + i]; + } + + if (buffer[0] != ' ') + buffer[stop - start + 1] = 0; //terminate string + else + buffer[0] = 0; + } + + inline std::string tolower(std::string & s) + { + std::transform(s.begin(), s.end(), s.begin(), static_cast < int(*)(int) > (std::tolower)); + return s; + } + + + +} //namespace + +///////// reader //////////// + +/** @brief Reads a sparse or dense matrix from a file (MatrixMarket format) +* +* Note: If the matrix in the MatrixMarket file is complex, only the real-valued part is loaded! +* +* @param mat The matrix that is to be read +* @param file Filename from which the matrix should be read +* @param index_base The index base, typically 1 +* @tparam MatrixT A generic matrix type. Type requirements: size1() returns number of rows, size2() returns number columns, operator() writes array entries, resize() allows resizing the matrix. +* @return Returns nonzero if file is read correctly +*/ +template<typename MatrixT> +long read_matrix_market_file_impl(MatrixT & mat, + const char * file, + long index_base) +{ + typedef typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<MatrixT>::type>::type ScalarT; + + //std::cout << "Reading matrix market file" << std::endl; + char buffer[1025]; + std::ifstream reader(file); + std::string token; + long linenum = 0; + bool symmetric = false; + bool dense_format = false; + bool is_header = true; + bool pattern_matrix = false; + //bool is_complex = false; + long cur_row = 0; + long cur_col = 0; + long valid_entries = 0; + long nnz = 0; + + + if (!reader){ + std::cerr << "ViennaCL: Matrix Market Reader: Cannot open file " << file << std::endl; + return EXIT_FAILURE; + } + + while (reader.good()) + { + // get a non-empty line + do + { + reader.getline(buffer, 1024); + ++linenum; + detail::trim(buffer, 1024); + } + while (reader.good() && buffer[0] == 0); + + if (buffer[0] == '%') + { + if (buffer[1] == '%') + { + //parse header: + std::stringstream line(std::string(buffer + 2)); + line >> token; + if (detail::tolower(token) != "matrixmarket") + { + std::cerr << "Error in file " << file << " at line " << linenum << " in file " << file << ": Expected 'MatrixMarket', got '" << token << "'" << std::endl; + return 0; + } + + line >> token; + if (detail::tolower(token) != "matrix") + { + std::cerr << "Error in file " << file << " at line " << linenum << " in file " << file << ": Expected 'matrix', got '" << token << "'" << std::endl; + return 0; + } + + line >> token; + if (detail::tolower(token) != "coordinate") + { + if (detail::tolower(token) == "array") + { + dense_format = true; + std::cerr << "Error in file " << file << " at line " << linenum << " in file " << file << ": 'array' type is not supported yet!" << std::endl; + return 0; + } + else + { + std::cerr << "Error in file " << file << " at line " << linenum << " in file " << file << ": Expected 'array' or 'coordinate', got '" << token << "'" << std::endl; + return 0; + } + } + + line >> token; + if (detail::tolower(token) == "pattern") + { + pattern_matrix = true; + } + else if (detail::tolower(token) == "complex") + { + //is_complex = true; + } + else if (detail::tolower(token) != "real") + { + std::cerr << "Error in file " << file << ": The MatrixMarket reader provided with ViennaCL supports only real valued floating point arithmetic or pattern type matrices." << std::endl; + return 0; + } + + line >> token; + if (detail::tolower(token) == "general"){ } + else if (detail::tolower(token) == "symmetric"){ symmetric = true; } + else + { + std::cerr << "Error in file " << file << ": The MatrixMarket reader provided with ViennaCL supports only general or symmetric matrices." << std::endl; + return 0; + } + + } + } + else + { + std::stringstream line(std::stringstream::in | std::stringstream::out); + line << std::string(buffer); + + if (is_header) + { + //read header line + vcl_size_t rows; + vcl_size_t cols; + + if (line.good()) + line >> rows; + else + { + std::cerr << "Error in file " << file << ": Could not get matrix dimensions (rows) in line " << linenum << std::endl; + return 0; + } + + if (line.good()) + line >> cols; + else + { + std::cerr << "Error in file " << file << ": Could not get matrix dimensions (columns) in line " << linenum << std::endl; + return 0; + } + if (!dense_format) + { + if (line.good()) + line >> nnz; + else + { + std::cerr << "Error in file " << file << ": Could not get matrix dimensions (columns) in line " << linenum << std::endl; + return 0; + } + } + + if (rows > 0 && cols > 0) + viennacl::traits::resize(mat, rows, cols); + + is_header = false; + } + else + { + //read data + if (dense_format) + { + ScalarT value; + line >> value; + viennacl::traits::fill(mat, static_cast<vcl_size_t>(cur_row), static_cast<vcl_size_t>(cur_col), value); + + if (++cur_row == static_cast<long>(viennacl::traits::size1(mat))) + { + //next column + ++cur_col; + cur_row = 0; + } + } + else //sparse format + { + long row; + long col; + ScalarT value = ScalarT(1); + + //parse data: + if (line.good()) + line >> row; + else + { + std::cerr << "Error in file " << file << ": Parse error for matrix row entry in line " << linenum << std::endl; + return 0; + } + + if (line.good()) + line >> col; + else + { + std::cerr << "Error in file " << file << ": Parse error for matrix col entry in line " << linenum << std::endl; + return 0; + } + + //take index_base base into account: + row -= index_base; + col -= index_base; + + if (!pattern_matrix) // value for pattern matrix is implicitly 1, so we only need to read data for 'normal' matrices + { + if (line.good()) + { + line >> value; + } + else + { + std::cerr << "Error in file " << file << ": Parse error for matrix entry in line " << linenum << std::endl; + return 0; + } + } + + if (row >= static_cast<long>(viennacl::traits::size1(mat)) || row < 0) + { + std::cerr << "Error in file " << file << " at line " << linenum << ": Row index out of bounds: " << row << " (matrix dim: " << viennacl::traits::size1(mat) << " x " << viennacl::traits::size2(mat) << ")" << std::endl; + return 0; + } + + if (col >= static_cast<long>(viennacl::traits::size2(mat)) || col < 0) + { + std::cerr << "Error in file " << file << " at line " << linenum << ": Column index out of bounds: " << col << " (matrix dim: " << viennacl::traits::size1(mat) << " x " << viennacl::traits::size2(mat) << ")" << std::endl; + return 0; + } + + viennacl::traits::fill(mat, static_cast<vcl_size_t>(row), static_cast<vcl_size_t>(col), value); //basically equivalent to mat(row, col) = value; + if (symmetric) + viennacl::traits::fill(mat, static_cast<vcl_size_t>(col), static_cast<vcl_size_t>(row), value); //basically equivalent to mat(col, row) = value; + + if (++valid_entries == nnz) + break; + + } //else dense_format + } + } + } + + //std::cout << linenum << " lines read." << std::endl; + reader.close(); + return linenum; +} + + +/** @brief Reads a sparse matrix from a file (MatrixMarket format) +* +* @param mat The matrix that is to be read (ublas-types and std::vector< std::map <unsigned int, ScalarT> > are supported) +* @param file The filename +* @param index_base The index base, typically 1 +* @tparam MatrixT A generic matrix type. Type requirements: size1() returns number of rows, size2() returns number columns, operator() writes array entries, resize() allows resizing the matrix. +* @return Returns nonzero if file is read correctly +*/ +template<typename MatrixT> +long read_matrix_market_file(MatrixT & mat, + const char * file, + long index_base = 1) +{ + return read_matrix_market_file_impl(mat, file, index_base); +} + +template<typename MatrixT> +long read_matrix_market_file(MatrixT & mat, + const std::string & file, + long index_base = 1) +{ + return read_matrix_market_file_impl(mat, file.c_str(), index_base); +} + +template<typename ScalarT> +long read_matrix_market_file(std::vector< std::map<unsigned int, ScalarT> > & mat, + const char * file, + long index_base = 1) +{ + viennacl::tools::sparse_matrix_adapter<ScalarT> adapted_matrix(mat); + return read_matrix_market_file_impl(adapted_matrix, file, index_base); +} + +template<typename ScalarT> +long read_matrix_market_file(std::vector< std::map<unsigned int, ScalarT> > & mat, + const std::string & file, + long index_base = 1) +{ + viennacl::tools::sparse_matrix_adapter<ScalarT> adapted_matrix(mat); + return read_matrix_market_file_impl(adapted_matrix, file.c_str(), index_base); +} + + +////////// writer ///////////// +template<typename MatrixT> +void write_matrix_market_file_impl(MatrixT const & mat, const char * file, long index_base) +{ + std::ofstream writer(file); + + long num_entries = 0; + for (typename MatrixT::const_iterator1 row_it = mat.begin1(); + row_it != mat.end1(); + ++row_it) + for (typename MatrixT::const_iterator2 col_it = row_it.begin(); + col_it != row_it.end(); + ++col_it) + ++num_entries; + + writer << "%%MatrixMarket matrix coordinate real general" << std::endl; + writer << mat.size1() << " " << mat.size2() << " " << num_entries << std::endl; + + for (typename MatrixT::const_iterator1 row_it = mat.begin1(); + row_it != mat.end1(); + ++row_it) + for (typename MatrixT::const_iterator2 col_it = row_it.begin(); + col_it != row_it.end(); + ++col_it) + writer << col_it.index1() + index_base << " " << col_it.index2() + index_base << " " << *col_it << std::endl; + + writer.close(); +} + +template<typename ScalarT> +void write_matrix_market_file(std::vector< std::map<unsigned int, ScalarT> > const & mat, + const char * file, + long index_base = 1) +{ + viennacl::tools::const_sparse_matrix_adapter<ScalarT> adapted_matrix(mat); + return write_matrix_market_file_impl(adapted_matrix, file, index_base); +} + +template<typename ScalarT> +void write_matrix_market_file(std::vector< std::map<unsigned int, ScalarT> > const & mat, + const std::string & file, + long index_base = 1) +{ + viennacl::tools::const_sparse_matrix_adapter<ScalarT> adapted_matrix(mat); + return write_matrix_market_file_impl(adapted_matrix, file.c_str(), index_base); +} + +/** @brief Writes a sparse matrix to a file (MatrixMarket format) +* +* @param mat The matrix that is to be read (ublas-types and std::vector< std::map <unsigned int, ScalarT> > are supported) +* @param file The filename +* @param index_base The index base, typically 1 +* @tparam MatrixT A generic matrix type. Type requirements: size1() returns number of rows, size2() returns number columns, operator() writes array entries, resize() allows resizing the matrix. +* @return Returns nonzero if file is read correctly +*/ +template<typename MatrixT> +void write_matrix_market_file(MatrixT const & mat, + const std::string & file, + long index_base = 1) +{ + write_matrix_market_file_impl(mat, file.c_str(), index_base); +} + + +} //namespace io +} //namespace viennacl + +#endif
