http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/forwards.h
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/forwards.h 
b/native-viennaCL/src/main/cpp/viennacl/forwards.h
new file mode 100644
index 0000000..23a4580
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/forwards.h
@@ -0,0 +1,1032 @@
+#ifndef VIENNACL_FORWARDS_H
+#define VIENNACL_FORWARDS_H
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   [email protected]
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= 
*/
+
+
+/** @file viennacl/forwards.h
+    @brief This file provides the forward declarations for the main types used 
within ViennaCL
+*/
+
+/**
+ @mainpage Main Page
+
+ Here you can find all the documentation on how to use the GPU-accelerated 
linear algebra library ViennaCL.
+ The formerly separate \ref usermanual "user manual" is no longer available as 
a standalone PDF, but all integrated into the HTML-based documentation.
+ Please use the navigation panel on the left to access the desired information.
+
+ Quick links:
+     - \ref manual-installation "Installation and building the examples"
+     - \ref manual-types        "Basic types"
+     - \ref manual-operations   "Basic operations"
+     - \ref manual-algorithms   "Algorithms"
+
+
+ -----------------------------------
+ \htmlonly
+ <div style="align: right; width: 100%">
+ <a href="http://www.tuwien.ac.at/";><img src="tuwien.png"></a>
+ <a href="http://www.iue.tuwien.ac.at/";><img src="iue.png"></a>
+ <a href="http://www.asc.tuwien.ac.at/";><img src="asc.png"></a>
+ </div>
+ \endhtmlonly
+*/
+
+
+//compatibility defines:
+#ifdef VIENNACL_HAVE_UBLAS
+  #define VIENNACL_WITH_UBLAS
+#endif
+
+#ifdef VIENNACL_HAVE_EIGEN
+  #define VIENNACL_WITH_EIGEN
+#endif
+
+#ifdef VIENNACL_HAVE_MTL4
+  #define VIENNACL_WITH_MTL4
+#endif
+
+#include <cstddef>
+#include <cassert>
+#include <string>
+#include <stdexcept>
+
+#include "viennacl/meta/enable_if.hpp"
+#include "viennacl/version.hpp"
+
+/** @brief Main namespace in ViennaCL. Holds all the basic types such as 
vector, matrix, etc. and defines operations upon them. */
+namespace viennacl
+{
+  typedef std::size_t                                       vcl_size_t;
+  typedef std::ptrdiff_t                                    vcl_ptrdiff_t;
+
+
+
+  /** @brief A tag class representing assignment */
+  struct op_assign {};
+  /** @brief A tag class representing inplace addition */
+  struct op_inplace_add {};
+  /** @brief A tag class representing inplace subtraction */
+  struct op_inplace_sub {};
+
+  /** @brief A tag class representing addition */
+  struct op_add {};
+  /** @brief A tag class representing subtraction */
+  struct op_sub {};
+  /** @brief A tag class representing multiplication by a scalar */
+  struct op_mult {};
+  /** @brief A tag class representing matrix-vector products and element-wise 
multiplications*/
+  struct op_prod {};
+  /** @brief A tag class representing matrix-matrix products */
+  struct op_mat_mat_prod {};
+  /** @brief A tag class representing division */
+  struct op_div {};
+  /** @brief A tag class representing the power function */
+  struct op_pow {};
+
+  /** @brief A tag class representing equality */
+ struct op_eq {};
+ /** @brief A tag class representing inequality */
+ struct op_neq {};
+ /** @brief A tag class representing greater-than */
+ struct op_greater {};
+ /** @brief A tag class representing less-than */
+ struct op_less {};
+ /** @brief A tag class representing greater-than-or-equal-to */
+ struct op_geq {};
+ /** @brief A tag class representing less-than-or-equal-to */
+ struct op_leq {};
+
+  /** @brief A tag class representing the summation of a vector */
+  struct op_sum {};
+
+  /** @brief A tag class representing the summation of all rows of a matrix */
+  struct op_row_sum {};
+
+  /** @brief A tag class representing the summation of all columns of a matrix 
*/
+  struct op_col_sum {};
+
+  /** @brief A tag class representing element-wise casting operations on 
vectors and matrices */
+  template<typename OP>
+  struct op_element_cast {};
+
+  /** @brief A tag class representing element-wise binary operations (like 
multiplication) on vectors or matrices */
+  template<typename OP>
+  struct op_element_binary {};
+
+  /** @brief A tag class representing element-wise unary operations (like 
sin()) on vectors or matrices */
+  template<typename OP>
+  struct op_element_unary {};
+
+  /** @brief A tag class representing the modulus function for integers */
+  struct op_abs {};
+  /** @brief A tag class representing the acos() function */
+  struct op_acos {};
+  /** @brief A tag class representing the asin() function */
+  struct op_asin {};
+  /** @brief A tag class for representing the argmax() function */
+  struct op_argmax {};
+  /** @brief A tag class for representing the argmin() function */
+  struct op_argmin {};
+  /** @brief A tag class representing the atan() function */
+  struct op_atan {};
+  /** @brief A tag class representing the atan2() function */
+  struct op_atan2 {};
+  /** @brief A tag class representing the ceil() function */
+  struct op_ceil {};
+  /** @brief A tag class representing the cos() function */
+  struct op_cos {};
+  /** @brief A tag class representing the cosh() function */
+  struct op_cosh {};
+  /** @brief A tag class representing the exp() function */
+  struct op_exp {};
+  /** @brief A tag class representing the fabs() function */
+  struct op_fabs {};
+  /** @brief A tag class representing the fdim() function */
+  struct op_fdim {};
+  /** @brief A tag class representing the floor() function */
+  struct op_floor {};
+  /** @brief A tag class representing the fmax() function */
+  struct op_fmax {};
+  /** @brief A tag class representing the fmin() function */
+  struct op_fmin {};
+  /** @brief A tag class representing the fmod() function */
+  struct op_fmod {};
+  /** @brief A tag class representing the log() function */
+  struct op_log {};
+  /** @brief A tag class representing the log10() function */
+  struct op_log10 {};
+  /** @brief A tag class representing the sin() function */
+  struct op_sin {};
+  /** @brief A tag class representing the sinh() function */
+  struct op_sinh {};
+  /** @brief A tag class representing the sqrt() function */
+  struct op_sqrt {};
+  /** @brief A tag class representing the tan() function */
+  struct op_tan {};
+  /** @brief A tag class representing the tanh() function */
+  struct op_tanh {};
+
+  /** @brief A tag class representing the (off-)diagonal of a matrix */
+  struct op_matrix_diag {};
+
+  /** @brief A tag class representing a matrix given by a vector placed on a 
certain (off-)diagonal */
+  struct op_vector_diag {};
+
+  /** @brief A tag class representing the extraction of a matrix row to a 
vector */
+  struct op_row {};
+
+  /** @brief A tag class representing the extraction of a matrix column to a 
vector */
+  struct op_column {};
+
+  /** @brief A tag class representing inner products of two vectors */
+  struct op_inner_prod {};
+
+  /** @brief A tag class representing the 1-norm of a vector */
+  struct op_norm_1 {};
+
+  /** @brief A tag class representing the 2-norm of a vector */
+  struct op_norm_2 {};
+
+  /** @brief A tag class representing the inf-norm of a vector */
+  struct op_norm_inf {};
+
+  /** @brief A tag class representing the maximum of a vector */
+  struct op_max {};
+
+  /** @brief A tag class representing the minimum of a vector */
+  struct op_min {};
+
+
+  /** @brief A tag class representing the Frobenius-norm of a matrix */
+  struct op_norm_frobenius {};
+
+  /** @brief A tag class representing transposed matrices */
+  struct op_trans {};
+
+  /** @brief A tag class representing sign flips (for scalars only. Vectors 
and matrices use the standard multiplication by the scalar -1.0) */
+  struct op_flip_sign {};
+
+  //forward declaration of basic types:
+  template<class TYPE>
+  class scalar;
+
+  template<typename LHS, typename RHS, typename OP>
+  class scalar_expression;
+
+  template<typename SCALARTYPE>
+  class entry_proxy;
+
+  template<typename SCALARTYPE>
+  class const_entry_proxy;
+
+  template<typename LHS, typename RHS, typename OP>
+  class vector_expression;
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT>
+  class vector_iterator;
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT>
+  class const_vector_iterator;
+
+  template<typename SCALARTYPE>
+  class implicit_vector_base;
+
+  template<typename SCALARTYPE>
+  struct zero_vector;
+
+  template<typename SCALARTYPE>
+  struct unit_vector;
+
+  template<typename SCALARTYPE>
+  struct one_vector;
+
+  template<typename SCALARTYPE>
+  struct scalar_vector;
+
+  template<class SCALARTYPE, typename SizeType = vcl_size_t, typename 
DistanceType = vcl_ptrdiff_t>
+  class vector_base;
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
+  class vector;
+
+  template<typename ScalarT>
+  class vector_tuple;
+
+  //the following forwards are needed for GMRES
+  template<typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR>
+  void copy(CPU_ITERATOR const & cpu_begin,
+            CPU_ITERATOR const & cpu_end,
+            vector_iterator<SCALARTYPE, ALIGNMENT> gpu_begin);
+
+  template<typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int 
ALIGNMENT_DEST>
+  void copy(const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & 
gpu_src_begin,
+            const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & 
gpu_src_end,
+            vector_iterator<SCALARTYPE, ALIGNMENT_DEST> gpu_dest_begin);
+
+  template<typename SCALARTYPE, unsigned int ALIGNMENT_SRC, unsigned int 
ALIGNMENT_DEST>
+  void copy(const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & 
gpu_src_begin,
+            const_vector_iterator<SCALARTYPE, ALIGNMENT_SRC> const & 
gpu_src_end,
+            const_vector_iterator<SCALARTYPE, ALIGNMENT_DEST> gpu_dest_begin);
+
+  template<typename SCALARTYPE, unsigned int ALIGNMENT, typename CPU_ITERATOR>
+  void fast_copy(const const_vector_iterator<SCALARTYPE, ALIGNMENT> & 
gpu_begin,
+                 const const_vector_iterator<SCALARTYPE, ALIGNMENT> & gpu_end,
+                 CPU_ITERATOR cpu_begin );
+
+  template<typename CPU_ITERATOR, typename SCALARTYPE, unsigned int ALIGNMENT>
+  void fast_copy(CPU_ITERATOR const & cpu_begin,
+                  CPU_ITERATOR const & cpu_end,
+                  vector_iterator<SCALARTYPE, ALIGNMENT> gpu_begin);
+
+
+  /** @brief Tag class for indicating row-major layout of a matrix. Not passed 
to the matrix directly, see row_major type. */
+  struct row_major_tag {};
+  /** @brief Tag class for indicating column-major layout of a matrix. Not 
passed to the matrix directly, see row_major type. */
+  struct column_major_tag {};
+
+  /** @brief A tag for row-major storage of a dense matrix. */
+  struct row_major
+  {
+    typedef row_major_tag         orientation_category;
+
+    /** @brief Returns the memory offset for entry (i,j) of a dense matrix.
+    *
+    * @param i   row index
+    * @param j   column index
+    * @param num_cols  number of entries per column (including alignment)
+    */
+    static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t /* 
num_rows */, vcl_size_t num_cols)
+    {
+      return i * num_cols + j;
+    }
+  };
+
+  /** @brief A tag for column-major storage of a dense matrix. */
+  struct column_major
+  {
+    typedef column_major_tag         orientation_category;
+
+    /** @brief Returns the memory offset for entry (i,j) of a dense matrix.
+    *
+    * @param i   row index
+    * @param j   column index
+    * @param num_rows  number of entries per row (including alignment)
+    */
+    static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t 
num_rows, vcl_size_t /* num_cols */)
+    {
+      return i + j * num_rows;
+    }
+  };
+
+  struct row_iteration;
+  struct col_iteration;
+
+  template<typename LHS, typename RHS, typename OP>
+  class matrix_expression;
+
+  class context;
+
+  enum memory_types
+  {
+    MEMORY_NOT_INITIALIZED
+    , MAIN_MEMORY
+    , OPENCL_MEMORY
+    , CUDA_MEMORY
+  };
+
+  namespace backend
+  {
+    class mem_handle;
+  }
+
+  //
+  // Matrix types:
+  //
+  static const vcl_size_t dense_padding_size = 128;
+
+  /** @brief A dense matrix class
+  *
+  * @tparam SCALARTYPE   The underlying scalar type (either float or double)
+  * @tparam ALIGNMENT   The internal memory size is given by (size()/ALIGNMENT 
+ 1) * ALIGNMENT. ALIGNMENT must be a power of two. Best values or usually 4, 8 
or 16, higher values are usually a waste of memory.
+  */
+  template<typename ROWCOL, typename MATRIXTYPE>
+  class matrix_iterator;
+
+  template<class SCALARTYPE, typename SizeType = vcl_size_t, typename 
DistanceType = vcl_ptrdiff_t>
+  class matrix_base;
+
+  template<class SCALARTYPE, typename F = row_major, unsigned int ALIGNMENT = 
1>
+  class matrix;
+
+  template<typename SCALARTYPE>
+  class implicit_matrix_base;
+
+  template<class SCALARTYPE>
+  class identity_matrix;
+
+  template<class SCALARTYPE>
+  class zero_matrix;
+
+  template<class SCALARTYPE>
+  class scalar_matrix;
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
+  class compressed_matrix;
+
+  template<class SCALARTYPE>
+  class compressed_compressed_matrix;
+
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT = 128>
+  class coordinate_matrix;
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
+  class ell_matrix;
+
+  template<typename ScalarT, typename IndexT = unsigned int>
+  class sliced_ell_matrix;
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
+  class hyb_matrix;
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
+  class circulant_matrix;
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
+  class hankel_matrix;
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
+  class toeplitz_matrix;
+
+  template<class SCALARTYPE, unsigned int ALIGNMENT = 1>
+  class vandermonde_matrix;
+
+  //
+  // Proxies:
+  //
+  template<typename SizeType = vcl_size_t, typename DistanceType = 
std::ptrdiff_t>
+  class basic_range;
+
+  typedef basic_range<>  range;
+
+  template<typename SizeType = vcl_size_t, typename DistanceType = 
std::ptrdiff_t>
+  class basic_slice;
+
+  typedef basic_slice<>  slice;
+
+  template<typename VectorType>
+  class vector_range;
+
+  template<typename VectorType>
+  class vector_slice;
+
+  template<typename MatrixType>
+  class matrix_range;
+
+  template<typename MatrixType>
+  class matrix_slice;
+
+
+  /** @brief Helper struct for checking whether a type is a host scalar type 
(e.g. float, double) */
+  template<typename T>
+  struct is_cpu_scalar
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper struct for checking whether a type is a viennacl::scalar<> 
*/
+  template<typename T>
+  struct is_scalar
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper struct for checking whether a type represents a sign flip 
on a viennacl::scalar<> */
+  template<typename T>
+  struct is_flip_sign_scalar
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper struct for checking whether the provided type represents a 
scalar (either host, from ViennaCL, or a flip-sign proxy) */
+  template<typename T>
+  struct is_any_scalar
+  {
+    enum { value = (is_scalar<T>::value || is_cpu_scalar<T>::value || 
is_flip_sign_scalar<T>::value )};
+  };
+
+  /** @brief Checks for a type being either vector_base or 
implicit_vector_base */
+  template<typename T>
+  struct is_any_vector { enum { value = 0 }; };
+
+  /** @brief Checks for either matrix_base or implicit_matrix_base */
+  template<typename T>
+  struct is_any_dense_matrix { enum { value = 0 }; };
+
+  /** @brief Helper class for checking whether a matrix has a row-major 
layout. */
+  template<typename T>
+  struct is_row_major
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper class for checking whether a matrix is a compressed_matrix 
(CSR format) */
+  template<typename T>
+  struct is_compressed_matrix
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper class for checking whether a matrix is a coordinate_matrix 
(COO format) */
+  template<typename T>
+  struct is_coordinate_matrix
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper class for checking whether a matrix is an ell_matrix (ELL 
format) */
+  template<typename T>
+  struct is_ell_matrix
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper class for checking whether a matrix is a sliced_ell_matrix 
(SELL-C-\f$ \sigma \f$ format) */
+  template<typename T>
+  struct is_sliced_ell_matrix
+  {
+    enum { value = false };
+  };
+
+
+  /** @brief Helper class for checking whether a matrix is a hyb_matrix 
(hybrid format: ELL plus CSR) */
+  template<typename T>
+  struct is_hyb_matrix
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper class for checking whether the provided type is one of the 
sparse matrix types (compressed_matrix, coordinate_matrix, etc.) */
+  template<typename T>
+  struct is_any_sparse_matrix
+  {
+    enum { value = false };
+  };
+
+
+  /** @brief Helper class for checking whether a matrix is a circulant matrix 
*/
+  template<typename T>
+  struct is_circulant_matrix
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper class for checking whether a matrix is a Hankel matrix */
+  template<typename T>
+  struct is_hankel_matrix
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper class for checking whether a matrix is a Toeplitz matrix */
+  template<typename T>
+  struct is_toeplitz_matrix
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper class for checking whether a matrix is a Vandermonde 
matrix */
+  template<typename T>
+  struct is_vandermonde_matrix
+  {
+    enum { value = false };
+  };
+
+  /** @brief Helper class for checking whether the provided type is any of the 
dense structured matrix types (circulant, Hankel, etc.) */
+  template<typename T>
+  struct is_any_dense_structured_matrix
+  {
+    enum { value = viennacl::is_circulant_matrix<T>::value || 
viennacl::is_hankel_matrix<T>::value || viennacl::is_toeplitz_matrix<T>::value 
|| viennacl::is_vandermonde_matrix<T>::value };
+  };
+
+
+
+
+  /** @brief Exception class in case of memory errors */
+  class memory_exception : public std::exception
+  {
+  public:
+    memory_exception() : message_() {}
+    memory_exception(std::string message) : message_("ViennaCL: Internal 
memory error: " + message) {}
+
+    virtual const char* what() const throw() { return message_.c_str(); }
+
+    virtual ~memory_exception() throw() {}
+  private:
+    std::string message_;
+  };
+
+  class cuda_not_available_exception : public std::exception
+  {
+  public:
+    cuda_not_available_exception() : message_("ViennaCL was compiled without 
CUDA support, but CUDA functionality required for this operation.") {}
+
+    virtual const char* what() const throw() { return message_.c_str(); }
+
+    virtual ~cuda_not_available_exception() throw() {}
+  private:
+    std::string message_;
+  };
+
+  class zero_on_diagonal_exception : public std::runtime_error
+  {
+  public:
+    zero_on_diagonal_exception(std::string const & what_arg) : 
std::runtime_error(what_arg) {}
+  };
+
+  class unknown_norm_exception : public std::runtime_error
+  {
+  public:
+    unknown_norm_exception(std::string const & what_arg) : 
std::runtime_error(what_arg) {}
+  };
+
+
+
+  namespace tools
+  {
+    //helper for matrix row/col iterators
+    //must be specialized for every viennacl matrix type
+    /** @brief Helper class for incrementing an iterator in a dense matrix. */
+    template<typename ROWCOL, typename MATRIXTYPE>
+    struct MATRIX_ITERATOR_INCREMENTER
+    {
+      typedef typename 
MATRIXTYPE::ERROR_SPECIALIZATION_FOR_THIS_MATRIX_TYPE_MISSING          
ErrorIndicator;
+
+      static void apply(const MATRIXTYPE & /*mat*/, unsigned int & /*row*/, 
unsigned int & /*col*/) {}
+    };
+  }
+
+  namespace linalg
+  {
+#if !defined(_MSC_VER) || defined(__CUDACC__)
+
+    template<class SCALARTYPE, unsigned int ALIGNMENT>
+    void convolve_i(viennacl::vector<SCALARTYPE, ALIGNMENT>& input1,
+                    viennacl::vector<SCALARTYPE, ALIGNMENT>& input2,
+                    viennacl::vector<SCALARTYPE, ALIGNMENT>& output);
+
+    template<typename T>
+    viennacl::vector_expression<const vector_base<T>, const vector_base<T>, 
op_element_binary<op_prod> >
+    element_prod(vector_base<T> const & v1, vector_base<T> const & v2);
+
+    template<typename T>
+    viennacl::vector_expression<const vector_base<T>, const vector_base<T>, 
op_element_binary<op_div> >
+    element_div(vector_base<T> const & v1, vector_base<T> const & v2);
+
+
+
+    template<typename T>
+    void inner_prod_impl(vector_base<T> const & vec1,
+                         vector_base<T> const & vec2,
+                         scalar<T> & result);
+
+    template<typename LHS, typename RHS, typename OP, typename T>
+    void inner_prod_impl(viennacl::vector_expression<LHS, RHS, OP> const & 
vec1,
+                         vector_base<T> const & vec2,
+                         scalar<T> & result);
+
+    template<typename T, typename LHS, typename RHS, typename OP>
+    void inner_prod_impl(vector_base<T> const & vec1,
+                         viennacl::vector_expression<LHS, RHS, OP> const & 
vec2,
+                         scalar<T> & result);
+
+    template<typename LHS1, typename RHS1, typename OP1,
+              typename LHS2, typename RHS2, typename OP2, typename T>
+    void inner_prod_impl(viennacl::vector_expression<LHS1, RHS1, OP1> const & 
vec1,
+                         viennacl::vector_expression<LHS2, RHS2, OP2> const & 
vec2,
+                         scalar<T> & result);
+
+    ///////////////////////////
+
+    template<typename T>
+    void inner_prod_cpu(vector_base<T> const & vec1,
+                        vector_base<T> const & vec2,
+                        T & result);
+
+    template<typename LHS, typename RHS, typename OP, typename T>
+    void inner_prod_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec1,
+                        vector_base<T> const & vec2,
+                        T & result);
+
+    template<typename T, typename LHS, typename RHS, typename OP>
+    void inner_prod_cpu(vector_base<T> const & vec1,
+                        viennacl::vector_expression<LHS, RHS, OP> const & vec2,
+                        T & result);
+
+    template<typename LHS1, typename RHS1, typename OP1,
+              typename LHS2, typename RHS2, typename OP2, typename S3>
+    void inner_prod_cpu(viennacl::vector_expression<LHS1, RHS1, OP1> const & 
vec1,
+                        viennacl::vector_expression<LHS2, RHS2, OP2> const & 
vec2,
+                        S3 & result);
+
+
+
+    //forward definition of norm_1_impl function
+    template<typename T>
+    void norm_1_impl(vector_base<T> const & vec, scalar<T> & result);
+
+    template<typename LHS, typename RHS, typename OP, typename T>
+    void norm_1_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                     scalar<T> & result);
+
+
+    template<typename T>
+    void norm_1_cpu(vector_base<T> const & vec,
+                    T & result);
+
+    template<typename LHS, typename RHS, typename OP, typename S2>
+    void norm_1_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                    S2 & result);
+
+    //forward definition of norm_2_impl function
+    template<typename T>
+    void norm_2_impl(vector_base<T> const & vec, scalar<T> & result);
+
+    template<typename LHS, typename RHS, typename OP, typename T>
+    void norm_2_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                     scalar<T> & result);
+
+    template<typename T>
+    void norm_2_cpu(vector_base<T> const & vec, T & result);
+
+    template<typename LHS, typename RHS, typename OP, typename S2>
+    void norm_2_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                    S2 & result);
+
+
+    //forward definition of norm_inf_impl function
+    template<typename T>
+    void norm_inf_impl(vector_base<T> const & vec, scalar<T> & result);
+
+    template<typename LHS, typename RHS, typename OP, typename T>
+    void norm_inf_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                      scalar<T> & result);
+
+
+    template<typename T>
+    void norm_inf_cpu(vector_base<T> const & vec, T & result);
+
+    template<typename LHS, typename RHS, typename OP, typename S2>
+    void norm_inf_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                      S2 & result);
+
+    //forward definition of max()-related functions
+    template<typename T>
+    void max_impl(vector_base<T> const & vec, scalar<T> & result);
+
+    template<typename LHS, typename RHS, typename OP, typename T>
+    void max_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                  scalar<T> & result);
+
+
+    template<typename T>
+    void max_cpu(vector_base<T> const & vec, T & result);
+
+    template<typename LHS, typename RHS, typename OP, typename S2>
+    void max_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                 S2 & result);
+
+    //forward definition of min()-related functions
+    template<typename T>
+    void min_impl(vector_base<T> const & vec, scalar<T> & result);
+
+    template<typename LHS, typename RHS, typename OP, typename T>
+    void min_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                  scalar<T> & result);
+
+
+    template<typename T>
+    void min_cpu(vector_base<T> const & vec, T & result);
+
+    template<typename LHS, typename RHS, typename OP, typename S2>
+    void min_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                 S2 & result);
+
+    //forward definition of sum()-related functions
+    template<typename T>
+    void sum_impl(vector_base<T> const & vec, scalar<T> & result);
+
+    template<typename LHS, typename RHS, typename OP, typename T>
+    void sum_impl(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                  scalar<T> & result);
+
+
+    template<typename T>
+    void sum_cpu(vector_base<T> const & vec, T & result);
+
+    template<typename LHS, typename RHS, typename OP, typename S2>
+    void sum_cpu(viennacl::vector_expression<LHS, RHS, OP> const & vec,
+                 S2 & result);
+
+
+    // forward definition of frobenius norm:
+    template<typename T>
+    void norm_frobenius_impl(matrix_base<T> const & vec, scalar<T> & result);
+
+    template<typename T>
+    void norm_frobenius_cpu(matrix_base<T> const & vec, T & result);
+
+
+    template<typename T>
+    vcl_size_t index_norm_inf(vector_base<T> const & vec);
+
+    template<typename LHS, typename RHS, typename OP>
+    vcl_size_t index_norm_inf(viennacl::vector_expression<LHS, RHS, OP> const 
& vec);
+
+    //forward definition of prod_impl functions
+
+    template<typename NumericT>
+    void prod_impl(const matrix_base<NumericT> & mat,
+                   const vector_base<NumericT> & vec,
+                         vector_base<NumericT> & result);
+
+    template<typename NumericT>
+    void prod_impl(const matrix_expression< const matrix_base<NumericT>, const 
matrix_base<NumericT>, op_trans> & mat_trans,
+                   const vector_base<NumericT> & vec,
+                         vector_base<NumericT> & result);
+
+    template<typename SparseMatrixType, class SCALARTYPE, unsigned int 
ALIGNMENT>
+    typename viennacl::enable_if< 
viennacl::is_any_sparse_matrix<SparseMatrixType>::value,
+                                  vector_expression<const SparseMatrixType,
+                                                    const vector<SCALARTYPE, 
ALIGNMENT>,
+                                                    op_prod >
+                                 >::type
+    prod_impl(const SparseMatrixType & mat,
+              const vector<SCALARTYPE, ALIGNMENT> & vec);
+
+    // forward definition of summation routines for matrices:
+
+    template<typename NumericT>
+    void row_sum_impl(const matrix_base<NumericT> & A,
+                            vector_base<NumericT> & result);
+
+    template<typename NumericT>
+    void column_sum_impl(const matrix_base<NumericT> & A,
+                               vector_base<NumericT> & result);
+
+#endif
+
+    namespace detail
+    {
+      enum row_info_types
+      {
+        SPARSE_ROW_NORM_INF = 0,
+        SPARSE_ROW_NORM_1,
+        SPARSE_ROW_NORM_2,
+        SPARSE_ROW_DIAGONAL
+      };
+
+    }
+
+
+    /** @brief A tag class representing a lower triangular matrix */
+    struct lower_tag
+    {
+      static const char * name() { return "lower"; }
+    };      //lower triangular matrix
+    /** @brief A tag class representing an upper triangular matrix */
+    struct upper_tag
+    {
+      static const char * name() { return "upper"; }
+    };      //upper triangular matrix
+    /** @brief A tag class representing a lower triangular matrix with unit 
diagonal*/
+    struct unit_lower_tag
+    {
+      static const char * name() { return "unit_lower"; }
+    }; //unit lower triangular matrix
+    /** @brief A tag class representing an upper triangular matrix with unit 
diagonal*/
+    struct unit_upper_tag
+    {
+      static const char * name() { return "unit_upper"; }
+    }; //unit upper triangular matrix
+
+    //preconditioner tags
+    class ilut_tag;
+
+    /** @brief A tag class representing the use of no preconditioner */
+    class no_precond
+    {
+      public:
+        template<typename VectorType>
+        void apply(VectorType &) const {}
+    };
+
+
+  } //namespace linalg
+
+  //
+  // More namespace comments to follow:
+  //
+
+  /** @brief Namespace providing routines for handling the different memory 
domains. */
+  namespace backend
+  {
+    /** @brief Provides implementations for handling memory buffers in CPU 
RAM. */
+    namespace cpu_ram
+    {
+      /** @brief Holds implementation details for handling memory buffers in 
CPU RAM. Not intended for direct use by library users. */
+      namespace detail {}
+    }
+
+    /** @brief Provides implementations for handling CUDA memory buffers. */
+    namespace cuda
+    {
+      /** @brief Holds implementation details for handling CUDA memory 
buffers. Not intended for direct use by library users. */
+      namespace detail {}
+    }
+
+    /** @brief Implementation details for the generic memory backend 
interface. */
+    namespace detail {}
+
+    /** @brief Provides implementations for handling OpenCL memory buffers. */
+    namespace opencl
+    {
+      /** @brief Holds implementation details for handling OpenCL memory 
buffers. Not intended for direct use by library users. */
+      namespace detail {}
+    }
+  }
+
+
+  /** @brief Holds implementation details for functionality in the main 
viennacl-namespace. Not intended for direct use by library users. */
+  namespace detail
+  {
+    /** @brief Helper namespace for fast Fourier transforms. Not to be used 
directly by library users. */
+    namespace fft
+    {
+      /** @brief Helper namespace for fast-Fourier transformation. Deprecated. 
*/
+      namespace FFT_DATA_ORDER {}
+    }
+  }
+
+
+  /** @brief Provides an OpenCL kernel generator. */
+  namespace device_specific
+  {
+    /** @brief Provides the implementation for tuning the kernels for a 
particular device. */
+    namespace autotune {}
+
+    /** @brief Contains implementation details of the kernel generator. */
+    namespace detail {}
+
+    /** @brief Namespace holding the various device-specific parameters for 
generating the best kernels. */
+    namespace profiles {}
+
+    /** @brief Contains various helper routines for kernel generation. */
+    namespace utils {}
+  }
+
+  /** @brief Provides basic input-output functionality. */
+  namespace io
+  {
+    /** @brief Implementation details for IO functionality. Usually not of 
interest for a library user. */
+    namespace detail {}
+
+    /** @brief Namespace holding the various XML tag definitions for the 
kernel parameter tuning facility. */
+    namespace tag {}
+
+    /** @brief Namespace holding the various XML strings for the kernel 
parameter tuning facility. */
+    namespace val {}
+  }
+
+  /** @brief Provides all linear algebra operations which are not covered by 
operator overloads. */
+  namespace linalg
+  {
+    /** @brief Holds all CUDA compute kernels used by ViennaCL. */
+    namespace cuda
+    {
+      /** @brief Helper functions for the CUDA linear algebra backend. */
+      namespace detail {}
+    }
+
+    /** @brief Namespace holding implementation details for linear algebra 
routines. Usually not of interest for a library user. */
+    namespace detail
+    {
+      /** @brief Implementation namespace for algebraic multigrid 
preconditioner. */
+      namespace amg {}
+
+      /** @brief Implementation namespace for sparse approximate inverse 
preconditioner. */
+      namespace spai {}
+    }
+
+    /** @brief Holds all compute kernels with conventional host-based 
execution (buffers in CPU RAM). */
+    namespace host_based
+    {
+      /** @brief Helper functions for the host-based linear algebra backend. */
+      namespace detail {}
+    }
+
+    /** @brief Namespace containing the OpenCL kernels. Deprecated, will be 
moved to viennacl::linalg::opencl in future releases. */
+    namespace kernels {}
+
+    /** @brief Holds all routines providing OpenCL linear algebra operations. 
*/
+    namespace opencl
+    {
+      /** @brief Helper functions for OpenCL-accelerated linear algebra 
operations. */
+      namespace detail {}
+
+      /** @brief Contains the OpenCL kernel generation functions for a 
predefined set of functionality. */
+      namespace kernels
+      {
+        /** @brief Implementation details for the predefined OpenCL kernels. */
+        namespace detail {}
+      }
+    }
+  }
+
+  /** @brief OpenCL backend. Manages platforms, contexts, buffers, kernels, 
etc. */
+  namespace ocl {}
+
+  /** @brief Namespace containing many meta-functions. */
+  namespace result_of {}
+
+  /** @brief Namespace for various tools used within ViennaCL. */
+  namespace tools
+  {
+    /** @brief Contains implementation details for the tools. Usually not of 
interest for the library user. */
+    namespace detail {}
+  }
+
+  /** @brief Namespace providing traits-information as well as generic 
wrappers to common routines for vectors and matrices such as size() or clear() 
*/
+  namespace traits {}
+
+  /** @brief Contains the scheduling functionality which allows for dynamic 
kernel generation as well as the fusion of multiple statements into a single 
kernel. */
+  namespace scheduler
+  {
+    /** @brief Implementation details for the scheduler */
+    namespace detail {}
+
+    /** @brief Helper metafunctions used for the scheduler */
+    namespace result_of {}
+  }
+
+} //namespace viennacl
+
+#endif
+
+/*@}*/

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/hankel_matrix.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/hankel_matrix.hpp 
b/native-viennaCL/src/main/cpp/viennacl/hankel_matrix.hpp
new file mode 100644
index 0000000..084e6c8
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/hankel_matrix.hpp
@@ -0,0 +1,343 @@
+#ifndef VIENNACL_HANKEL_MATRIX_HPP
+#define VIENNACL_HANKEL_MATRIX_HPP
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   [email protected]
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= 
*/
+
+
+/** @file hankel_matrix.hpp
+    @brief Implementation of the hankel_matrix class for efficient 
manipulation of Hankel matrices.  Experimental.
+*/
+
+#include "viennacl/forwards.h"
+#include "viennacl/vector.hpp"
+#include "viennacl/ocl/backend.hpp"
+
+#include "viennacl/toeplitz_matrix.hpp"
+#include "viennacl/fft.hpp"
+
+#include "viennacl/linalg/hankel_matrix_operations.hpp"
+
+namespace viennacl
+{
+/** @brief A Hankel matrix class
+  *
+  * @tparam NumericT   The underlying scalar type (either float or double)
+  * @tparam AlignmentV    The internal memory size is given by 
(size()/AlignmentV + 1) * AlignmentV. AlignmentV must be a power of two. Best 
values or usually 4, 8 or 16, higher values are usually a waste of memory.
+  */
+template<class NumericT, unsigned int AlignmentV>
+class hankel_matrix
+{
+public:
+  typedef viennacl::backend::mem_handle                                        
                      handle_type;
+  typedef scalar<typename 
viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType>   
value_type;
+
+  /**
+       * @brief The default constructor. Does not allocate any memory.
+       *
+       */
+  explicit hankel_matrix() {}
+
+  /**
+       * @brief         Creates the matrix with the given size
+       *
+       * @param rows      Number of rows of the matrix
+       * @param cols      Number of columns of the matrix
+       */
+  explicit hankel_matrix(vcl_size_t rows, vcl_size_t cols) : elements_(rows, 
cols)
+  {
+    assert(rows == cols && bool("Hankel matrix must be square!"));
+    (void)cols;  // avoid 'unused parameter' warning in optimized builds
+  }
+
+  /** @brief Resizes the matrix.
+      *   Existing entries can be preserved
+      *
+      * @param sz         New size of matrix
+      * @param preserve   If true, existing values are preserved.
+      */
+  void resize(vcl_size_t sz, bool preserve = true)
+  {
+    elements_.resize(sz, preserve);
+  }
+
+  /** @brief Returns the OpenCL handle
+      *
+      *   @return OpenCL handle
+      */
+  handle_type const & handle() const { return elements_.handle(); }
+
+  /**
+       * @brief Returns an internal viennacl::toeplitz_matrix, which 
represents a Hankel matrix elements
+       *
+       */
+  toeplitz_matrix<NumericT, AlignmentV> & elements() { return elements_; }
+  toeplitz_matrix<NumericT, AlignmentV> const & elements() const { return 
elements_; }
+
+  /**
+       * @brief Returns the number of rows of the matrix
+       */
+  vcl_size_t size1() const { return elements_.size1(); }
+
+  /**
+       * @brief Returns the number of columns of the matrix
+       */
+  vcl_size_t size2() const { return elements_.size2(); }
+
+  /** @brief Returns the internal size of matrix representtion.
+      *   Usually required for launching OpenCL kernels only
+      *
+      *   @return Internal size of matrix representation
+      */
+  vcl_size_t internal_size() const { return elements_.internal_size(); }
+
+  /**
+       * @brief Read-write access to a element of the matrix
+       *
+       * @param row_index  Row index of accessed element
+       * @param col_index  Column index of accessed element
+       * @return Proxy for matrix entry
+       */
+  entry_proxy<NumericT> operator()(unsigned int row_index, unsigned int 
col_index)
+  {
+    assert(row_index < size1() && col_index < size2() && bool("Invalid 
access"));
+
+    return elements_(size1() - row_index - 1, col_index);
+  }
+
+  /**
+       * @brief += operation for Hankel matrices
+       *
+       * @param that Matrix which will be added
+       * @return Result of addition
+       */
+  hankel_matrix<NumericT, AlignmentV>& operator +=(hankel_matrix<NumericT, 
AlignmentV>& that)
+  {
+    elements_ += that.elements();
+    return *this;
+  }
+
+private:
+  hankel_matrix(hankel_matrix const &) {}
+  hankel_matrix & operator=(hankel_matrix const & t);
+
+  toeplitz_matrix<NumericT, AlignmentV> elements_;
+};
+
+/** @brief Copies a Hankel matrix from the std::vector to the OpenCL device 
(either GPU or multi-core CPU)
+  *
+  *
+  * @param cpu_vec   A std::vector on the host.
+  * @param gpu_mat   A hankel_matrix from ViennaCL
+  */
+template<typename NumericT, unsigned int AlignmentV>
+void copy(std::vector<NumericT> const & cpu_vec, hankel_matrix<NumericT, 
AlignmentV> & gpu_mat)
+{
+  assert((gpu_mat.size1() * 2 - 1)  == cpu_vec.size() && bool("Size 
mismatch"));
+
+  copy(cpu_vec, gpu_mat.elements());
+}
+
+/** @brief Copies a Hankel matrix from the OpenCL device (either GPU or 
multi-core CPU) to the std::vector
+  *
+  *
+  * @param gpu_mat   A hankel_matrix from ViennaCL
+  * @param cpu_vec   A std::vector on the host.
+  */
+template<typename NumericT, unsigned int AlignmentV>
+void copy(hankel_matrix<NumericT, AlignmentV> const & gpu_mat, 
std::vector<NumericT> & cpu_vec)
+{
+  assert((gpu_mat.size1() * 2 - 1)  == cpu_vec.size() && bool("Size 
mismatch"));
+
+  copy(gpu_mat.elements(), cpu_vec);
+}
+
+/** @brief Copies a Hankel matrix from the OpenCL device (either GPU or 
multi-core CPU) to the matrix-like object
+  *
+  *
+  * @param han_src   A hankel_matrix from ViennaCL
+  * @param com_dst   A matrix-like object
+  */
+template<typename NumericT, unsigned int AlignmentV, typename MatrixT>
+void copy(hankel_matrix<NumericT, AlignmentV> const & han_src, MatrixT& 
com_dst)
+{
+  assert( (viennacl::traits::size1(com_dst) == han_src.size1()) && bool("Size 
mismatch") );
+  assert( (viennacl::traits::size2(com_dst) == han_src.size2()) && bool("Size 
mismatch") );
+
+  vcl_size_t size = han_src.size1();
+  std::vector<NumericT> tmp(size * 2 - 1);
+  copy(han_src, tmp);
+
+  for (vcl_size_t i = 0; i < size; i++)
+    for (vcl_size_t j = 0; j < size; j++)
+      com_dst(i, j) = tmp[i + j];
+}
+
+/** @brief Copies a the matrix-like object to the Hankel matrix from the 
OpenCL device (either GPU or multi-core CPU)
+  *
+  *
+  * @param com_src   A std::vector on the host
+  * @param han_dst   A hankel_matrix from ViennaCL
+  */
+template<typename NumericT, unsigned int AlignmentV, typename MatrixT>
+void copy(MatrixT const & com_src, hankel_matrix<NumericT, AlignmentV>& 
han_dst)
+{
+  assert( (han_dst.size1() == 0 || viennacl::traits::size1(com_src) == 
han_dst.size1()) && bool("Size mismatch") );
+  assert( (han_dst.size2() == 0 || viennacl::traits::size2(com_src) == 
han_dst.size2()) && bool("Size mismatch") );
+  assert( viennacl::traits::size2(com_src) == viennacl::traits::size1(com_src) 
&& bool("Logic error: non-square Hankel matrix!") );
+
+  vcl_size_t size = viennacl::traits::size1(com_src);
+
+  std::vector<NumericT> tmp(2*size - 1);
+
+  for (vcl_size_t i = 0; i < size; i++)
+    tmp[i] = com_src(0, i);
+
+  for (vcl_size_t i = 1; i < size; i++)
+    tmp[size + i - 1] = com_src(size - 1, i);
+
+  viennacl::copy(tmp, han_dst);
+}
+
+/*template<typename NumericT, unsigned int AlignmentV, unsigned int 
VECTOR_AlignmentV>
+  void prod_impl(hankel_matrix<NumericT, AlignmentV>& mat,
+                 vector<NumericT, VECTOR_AlignmentV>& vec,
+                 vector<NumericT, VECTOR_AlignmentV>& result)
+  {
+      prod_impl(mat.elements(), vec, result);
+      fft::reverse(result);
+  }*/
+
+template<class NumericT, unsigned int AlignmentV>
+std::ostream & operator<<(std::ostream & s, hankel_matrix<NumericT, 
AlignmentV>& gpu_matrix)
+{
+  vcl_size_t size = gpu_matrix.size1();
+  std::vector<NumericT> tmp(2*size - 1);
+  copy(gpu_matrix, tmp);
+  s << "[" << size << "," << size << "](";
+
+  for (vcl_size_t i = 0; i < size; i++)
+  {
+    s << "(";
+    for (vcl_size_t j = 0; j < size; j++)
+    {
+      s << tmp[i + j];
+      //s << (int)i - (int)j;
+      if (j < (size - 1)) s << ",";
+    }
+    s << ")";
+  }
+  s << ")";
+  return s;
+}
+
+//
+// Specify available operations:
+//
+
+/** \cond */
+
+namespace linalg
+{
+namespace detail
+{
+  // x = A * y
+  template<typename T, unsigned int A>
+  struct op_executor<vector_base<T>, op_assign, vector_expression<const 
hankel_matrix<T, A>, const vector_base<T>, op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hankel_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
+    {
+      // check for the special case x = A * x
+      if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
+      {
+        viennacl::vector<T> temp(lhs);
+        viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
+        lhs = temp;
+      }
+      else
+        viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
+    }
+  };
+
+  template<typename T, unsigned int A>
+  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const 
hankel_matrix<T, A>, const vector_base<T>, op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hankel_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
+    {
+      viennacl::vector<T> temp(lhs);
+      viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
+      lhs += temp;
+    }
+  };
+
+  template<typename T, unsigned int A>
+  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const 
hankel_matrix<T, A>, const vector_base<T>, op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hankel_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
+    {
+      viennacl::vector<T> temp(lhs);
+      viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
+      lhs -= temp;
+    }
+  };
+
+
+  // x = A * vec_op
+  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
+  struct op_executor<vector_base<T>, op_assign, vector_expression<const 
hankel_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, 
op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hankel_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, 
op_prod> const & rhs)
+    {
+      viennacl::vector<T> temp(rhs.rhs());
+      viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
+    }
+  };
+
+  // x = A * vec_op
+  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
+  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const 
hankel_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hankel_matrix<T, A>, vector_expression<const LHS, const RHS, OP>, op_prod> 
const & rhs)
+    {
+      viennacl::vector<T> temp(rhs.rhs());
+      viennacl::vector<T> temp_result(lhs);
+      viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
+      lhs += temp_result;
+    }
+  };
+
+  // x = A * vec_op
+  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
+  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const 
hankel_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, 
op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hankel_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, 
op_prod> const & rhs)
+    {
+      viennacl::vector<T> temp(rhs.rhs());
+      viennacl::vector<T> temp_result(lhs);
+      viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
+      lhs -= temp_result;
+    }
+  };
+
+
+
+} // namespace detail
+} // namespace linalg
+
+/** \endcond */
+}
+#endif // VIENNACL_HANKEL_MATRIX_HPP

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/hyb_matrix.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/hyb_matrix.hpp 
b/native-viennaCL/src/main/cpp/viennacl/hyb_matrix.hpp
new file mode 100644
index 0000000..e93ede5
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/hyb_matrix.hpp
@@ -0,0 +1,442 @@
+#ifndef VIENNACL_HYB_MATRIX_HPP_
+#define VIENNACL_HYB_MATRIX_HPP_
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   [email protected]
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= 
*/
+
+/** @file viennacl/hyb_matrix.hpp
+    @brief Implementation of the hyb_matrix class
+
+    Contributed by Volodymyr Kysenko.
+*/
+
+#include "viennacl/forwards.h"
+#include "viennacl/vector.hpp"
+
+#include "viennacl/tools/tools.hpp"
+
+#include "viennacl/linalg/sparse_matrix_operations.hpp"
+
+namespace viennacl
+{
+/** @brief Sparse matrix class using a hybrid format composed of the ELL and 
CSR format for storing the nonzeros. */
+template<typename NumericT, unsigned int AlignmentV  /* see forwards.h for 
default argument */>
+class hyb_matrix
+{
+public:
+  typedef viennacl::backend::mem_handle                                        
                      handle_type;
+  typedef scalar<typename 
viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<NumericT>::ResultType>   
value_type;
+
+  hyb_matrix() : csr_threshold_(NumericT(0.8)), rows_(0), cols_(0) {}
+
+  hyb_matrix(viennacl::context ctx) : csr_threshold_(NumericT(0.8)), rows_(0), 
cols_(0)
+  {
+    ell_coords_.switch_active_handle_id(ctx.memory_type());
+    ell_elements_.switch_active_handle_id(ctx.memory_type());
+
+    csr_rows_.switch_active_handle_id(ctx.memory_type());
+    csr_cols_.switch_active_handle_id(ctx.memory_type());
+    csr_elements_.switch_active_handle_id(ctx.memory_type());
+
+#ifdef VIENNACL_WITH_OPENCL
+    if (ctx.memory_type() == OPENCL_MEMORY)
+    {
+      ell_coords_.opencl_handle().context(ctx.opencl_context());
+      ell_elements_.opencl_handle().context(ctx.opencl_context());
+
+      csr_rows_.opencl_handle().context(ctx.opencl_context());
+      csr_cols_.opencl_handle().context(ctx.opencl_context());
+      csr_elements_.opencl_handle().context(ctx.opencl_context());
+    }
+#endif
+  }
+
+  /** @brief Resets all entries in the matrix back to zero without changing 
the matrix size. Resets the sparsity pattern. */
+  void clear()
+  {
+    // ELL part:
+    ellnnz_ = 0;
+
+    viennacl::backend::typesafe_host_array<unsigned int> 
host_coords_buffer(ell_coords_, internal_size1());
+    std::vector<NumericT> host_elements(internal_size1());
+
+    viennacl::backend::memory_create(ell_coords_,   
host_coords_buffer.element_size() * internal_size1(), 
viennacl::traits::context(ell_coords_),   host_coords_buffer.get());
+    viennacl::backend::memory_create(ell_elements_, sizeof(NumericT) * 
internal_size1(),                  viennacl::traits::context(ell_elements_), 
&(host_elements[0]));
+
+    // CSR part:
+    csrnnz_ = 0;
+
+    viennacl::backend::typesafe_host_array<unsigned int> 
host_row_buffer(csr_rows_, rows_ + 1);
+    viennacl::backend::typesafe_host_array<unsigned int> 
host_col_buffer(csr_cols_, 1);
+    host_elements.resize(1);
+
+    viennacl::backend::memory_create(csr_rows_,     
host_row_buffer.element_size() * (rows_ + 1), 
viennacl::traits::context(csr_rows_),     host_row_buffer.get());
+    viennacl::backend::memory_create(csr_cols_,     
host_col_buffer.element_size() * 1,           
viennacl::traits::context(csr_cols_),     host_col_buffer.get());
+    viennacl::backend::memory_create(csr_elements_, sizeof(NumericT) * 1,      
                   viennacl::traits::context(csr_elements_), 
&(host_elements[0]));
+  }
+
+  NumericT  csr_threshold()  const { return csr_threshold_; }
+  void csr_threshold(NumericT thr) { csr_threshold_ = thr; }
+
+  vcl_size_t internal_size1() const { return 
viennacl::tools::align_to_multiple<vcl_size_t>(rows_, AlignmentV); }
+  vcl_size_t internal_size2() const { return 
viennacl::tools::align_to_multiple<vcl_size_t>(cols_, AlignmentV); }
+
+  vcl_size_t size1() const { return rows_; }
+  vcl_size_t size2() const { return cols_; }
+
+  vcl_size_t internal_ellnnz() const {return 
viennacl::tools::align_to_multiple<vcl_size_t>(ellnnz_, AlignmentV); }
+  vcl_size_t ell_nnz() const { return ellnnz_; }
+  vcl_size_t csr_nnz() const { return csrnnz_; }
+
+  const handle_type & handle() const { return ell_elements_; }
+  const handle_type & handle2() const { return ell_coords_; }
+  const handle_type & handle3() const { return csr_rows_; }
+  const handle_type & handle4() const { return csr_cols_; }
+  const handle_type & handle5() const { return csr_elements_; }
+
+public:
+#if defined(_MSC_VER) && _MSC_VER < 1500          //Visual Studio 2005 needs 
special treatment
+  template<typename CPUMatrixT>
+  friend void copy(const CPUMatrixT & cpu_matrix, hyb_matrix & gpu_matrix );
+#else
+  template<typename CPUMatrixT, typename T, unsigned int ALIGN>
+  friend void copy(const CPUMatrixT & cpu_matrix, hyb_matrix<T, ALIGN> & 
gpu_matrix );
+#endif
+
+private:
+  NumericT  csr_threshold_;
+  vcl_size_t rows_;
+  vcl_size_t cols_;
+  vcl_size_t ellnnz_;
+  vcl_size_t csrnnz_;
+
+  handle_type ell_coords_; // ell coords
+  handle_type ell_elements_; // ell elements
+
+  handle_type csr_rows_;
+  handle_type csr_cols_;
+  handle_type csr_elements_;
+};
+
+template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV>
+void copy(const CPUMatrixT& cpu_matrix, hyb_matrix<NumericT, AlignmentV>& 
gpu_matrix )
+{
+  assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == 
gpu_matrix.size1()) && bool("Size mismatch") );
+  assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == 
gpu_matrix.size2()) && bool("Size mismatch") );
+
+  if (cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0)
+  {
+    //determine max capacity for row
+    vcl_size_t max_entries_per_row = 0;
+    std::vector<vcl_size_t> hist_entries(cpu_matrix.size2() + 1, 0);
+
+    for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); 
row_it != cpu_matrix.end1(); ++row_it)
+    {
+      vcl_size_t num_entries = 0;
+      for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); 
col_it != row_it.end(); ++col_it)
+      {
+        ++num_entries;
+      }
+
+      hist_entries[num_entries] += 1;
+      max_entries_per_row = std::max(max_entries_per_row, num_entries);
+    }
+
+    vcl_size_t sum = 0;
+    for (vcl_size_t ind = 0; ind <= max_entries_per_row; ind++)
+    {
+      sum += hist_entries[ind];
+
+      if (NumericT(sum) >= NumericT(gpu_matrix.csr_threshold()) * 
NumericT(cpu_matrix.size1()))
+      {
+        max_entries_per_row = ind;
+        break;
+      }
+    }
+
+    //setup GPU matrix
+    gpu_matrix.ellnnz_ = max_entries_per_row;
+    gpu_matrix.rows_ = cpu_matrix.size1();
+    gpu_matrix.cols_ = cpu_matrix.size2();
+
+    vcl_size_t nnz = gpu_matrix.internal_size1() * 
gpu_matrix.internal_ellnnz();
+
+    viennacl::backend::typesafe_host_array<unsigned int>  
ell_coords(gpu_matrix.ell_coords_, nnz);
+    viennacl::backend::typesafe_host_array<unsigned int>  
csr_rows(gpu_matrix.csr_rows_, cpu_matrix.size1() + 1);
+    std::vector<unsigned int> csr_cols;
+
+    std::vector<NumericT> ell_elements(nnz);
+    std::vector<NumericT> csr_elements;
+
+    vcl_size_t csr_index = 0;
+
+    for (typename CPUMatrixT::const_iterator1 row_it = cpu_matrix.begin1(); 
row_it != cpu_matrix.end1(); ++row_it)
+    {
+      vcl_size_t data_index = 0;
+
+      csr_rows.set(row_it.index1(), csr_index);
+
+      for (typename CPUMatrixT::const_iterator2 col_it = row_it.begin(); 
col_it != row_it.end(); ++col_it)
+      {
+        if (data_index < max_entries_per_row)
+        {
+          ell_coords.set(gpu_matrix.internal_size1() * data_index + 
col_it.index1(), col_it.index2());
+          ell_elements[gpu_matrix.internal_size1() * data_index + 
col_it.index1()] = *col_it;
+        }
+        else
+        {
+          csr_cols.push_back(static_cast<unsigned int>(col_it.index2()));
+          csr_elements.push_back(*col_it);
+
+          csr_index++;
+        }
+
+        data_index++;
+      }
+
+    }
+
+    if (csr_cols.empty())
+    {
+      csr_cols.push_back(0);
+      csr_elements.push_back(0);
+    }
+
+    csr_rows.set(csr_rows.size() - 1, csr_index);
+
+    gpu_matrix.csrnnz_ = csr_cols.size();
+
+    viennacl::backend::typesafe_host_array<unsigned int> 
csr_cols_for_gpu(gpu_matrix.csr_cols_, csr_cols.size());
+    for (vcl_size_t i=0; i<csr_cols.size(); ++i)
+      csr_cols_for_gpu.set(i, csr_cols[i]);
+
+    viennacl::backend::memory_create(gpu_matrix.ell_coords_,   
ell_coords.raw_size(),                    
traits::context(gpu_matrix.ell_coords_), ell_coords.get());
+    viennacl::backend::memory_create(gpu_matrix.ell_elements_, 
sizeof(NumericT) * ell_elements.size(), 
traits::context(gpu_matrix.ell_elements_), &(ell_elements[0]));
+
+    viennacl::backend::memory_create(gpu_matrix.csr_rows_,     
csr_rows.raw_size(),                      
traits::context(gpu_matrix.csr_rows_), csr_rows.get());
+    viennacl::backend::memory_create(gpu_matrix.csr_cols_,     
csr_cols_for_gpu.raw_size(),              
traits::context(gpu_matrix.csr_cols_), csr_cols_for_gpu.get());
+    viennacl::backend::memory_create(gpu_matrix.csr_elements_, 
sizeof(NumericT) * csr_elements.size(), 
traits::context(gpu_matrix.csr_elements_), &(csr_elements[0]));
+  }
+}
+
+
+/** @brief Copies a sparse matrix from the host to the compute device. The 
host type is the std::vector< std::map < > > format .
+  *
+  * @param cpu_matrix   A sparse matrix on the host composed of an STL vector 
and an STL map.
+  * @param gpu_matrix   The sparse hyb_matrix from ViennaCL
+  */
+template<typename IndexT, typename NumericT, unsigned int AlignmentV>
+void copy(std::vector< std::map<IndexT, NumericT> > const & cpu_matrix,
+          hyb_matrix<NumericT, AlignmentV> & gpu_matrix)
+{
+  vcl_size_t max_col = 0;
+  for (vcl_size_t i=0; i<cpu_matrix.size(); ++i)
+  {
+    if (cpu_matrix[i].size() > 0)
+      max_col = std::max<vcl_size_t>(max_col, (cpu_matrix[i].rbegin())->first);
+  }
+
+  viennacl::copy(tools::const_sparse_matrix_adapter<NumericT, 
IndexT>(cpu_matrix, cpu_matrix.size(), max_col + 1), gpu_matrix);
+}
+
+
+
+
+template<typename CPUMatrixT, typename NumericT, unsigned int AlignmentV>
+void copy(const hyb_matrix<NumericT, AlignmentV>& gpu_matrix, CPUMatrixT& 
cpu_matrix)
+{
+  assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && 
bool("Size mismatch") );
+  assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && 
bool("Size mismatch") );
+
+  if (gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0)
+  {
+    std::vector<NumericT> ell_elements(gpu_matrix.internal_size1() * 
gpu_matrix.internal_ellnnz());
+    viennacl::backend::typesafe_host_array<unsigned int> 
ell_coords(gpu_matrix.handle2(), gpu_matrix.internal_size1() * 
gpu_matrix.internal_ellnnz());
+
+    std::vector<NumericT> csr_elements(gpu_matrix.csr_nnz());
+    viennacl::backend::typesafe_host_array<unsigned int> 
csr_rows(gpu_matrix.handle3(), gpu_matrix.size1() + 1);
+    viennacl::backend::typesafe_host_array<unsigned int> 
csr_cols(gpu_matrix.handle4(), gpu_matrix.csr_nnz());
+
+    viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(NumericT) * 
ell_elements.size(), &(ell_elements[0]));
+    viennacl::backend::memory_read(gpu_matrix.handle2(), 0, 
ell_coords.raw_size(), ell_coords.get());
+    viennacl::backend::memory_read(gpu_matrix.handle3(), 0, 
csr_rows.raw_size(),   csr_rows.get());
+    viennacl::backend::memory_read(gpu_matrix.handle4(), 0, 
csr_cols.raw_size(),   csr_cols.get());
+    viennacl::backend::memory_read(gpu_matrix.handle5(), 0, sizeof(NumericT) * 
csr_elements.size(), &(csr_elements[0]));
+
+
+    for (vcl_size_t row = 0; row < gpu_matrix.size1(); row++)
+    {
+      for (vcl_size_t ind = 0; ind < gpu_matrix.internal_ellnnz(); ind++)
+      {
+        vcl_size_t offset = gpu_matrix.internal_size1() * ind + row;
+
+        NumericT val = ell_elements[offset];
+        if (val <= 0 && val >= 0) // val == 0 without compiler warnings
+          continue;
+
+        if (ell_coords[offset] >= gpu_matrix.size2())
+        {
+          std::cerr << "ViennaCL encountered invalid data " << offset << " " 
<< ind << " " << row << " " << ell_coords[offset] << " " << gpu_matrix.size2() 
<< std::endl;
+          return;
+        }
+
+        cpu_matrix(row, ell_coords[offset]) = val;
+      }
+
+      for (vcl_size_t ind = csr_rows[row]; ind < csr_rows[row+1]; ind++)
+      {
+        NumericT val = csr_elements[ind];
+        if (val <= 0 && val >= 0) // val == 0 without compiler warnings
+          continue;
+
+        if (csr_cols[ind] >= gpu_matrix.size2())
+        {
+          std::cerr << "ViennaCL encountered invalid data " << std::endl;
+          return;
+        }
+
+        cpu_matrix(row, csr_cols[ind]) = val;
+      }
+    }
+  }
+}
+
+/** @brief Copies a sparse matrix from the compute device to the host. The 
host type is the std::vector< std::map < > > format .
+  *
+  * @param gpu_matrix   The sparse hyb_matrix from ViennaCL
+  * @param cpu_matrix   A sparse matrix on the host composed of an STL vector 
and an STL map.
+  */
+template<typename NumericT, unsigned int AlignmentV, typename IndexT>
+void copy(const hyb_matrix<NumericT, AlignmentV> & gpu_matrix,
+          std::vector< std::map<IndexT, NumericT> > & cpu_matrix)
+{
+  if (cpu_matrix.size() == 0)
+    cpu_matrix.resize(gpu_matrix.size1());
+
+  assert(cpu_matrix.size() == gpu_matrix.size1() && bool("Matrix dimension 
mismatch!"));
+
+  tools::sparse_matrix_adapter<NumericT, IndexT> temp(cpu_matrix, 
cpu_matrix.size(), gpu_matrix.size2());
+  viennacl::copy(gpu_matrix, temp);
+}
+
+//
+// Specify available operations:
+//
+
+/** \cond */
+
+namespace linalg
+{
+namespace detail
+{
+  // x = A * y
+  template<typename T, unsigned int A>
+  struct op_executor<vector_base<T>, op_assign, vector_expression<const 
hyb_matrix<T, A>, const vector_base<T>, op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
+    {
+      // check for the special case x = A * x
+      if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
+      {
+        viennacl::vector<T> temp(lhs);
+        viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0));
+        lhs = temp;
+      }
+      else
+        viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(0));
+    }
+  };
+
+  template<typename T, unsigned int A>
+  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const 
hyb_matrix<T, A>, const vector_base<T>, op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
+    {
+      // check for the special case x += A * x
+      if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
+      {
+        viennacl::vector<T> temp(lhs);
+        viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0));
+        lhs += temp;
+      }
+      else
+        viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), lhs, T(1));
+    }
+  };
+
+  template<typename T, unsigned int A>
+  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const 
hyb_matrix<T, A>, const vector_base<T>, op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
+    {
+      // check for the special case x -= A * x
+      if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
+      {
+        viennacl::vector<T> temp(lhs);
+        viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(1), temp, T(0));
+        lhs -= temp;
+      }
+      else
+        viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), T(-1), lhs, T(1));
+    }
+  };
+
+
+  // x = A * vec_op
+  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
+  struct op_executor<vector_base<T>, op_assign, vector_expression<const 
hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> 
const & rhs)
+    {
+      viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
+      viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
+    }
+  };
+
+  // x = A * vec_op
+  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
+  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const 
hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> 
const & rhs)
+    {
+      viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
+      viennacl::vector<T> temp_result(lhs);
+      viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
+      lhs += temp_result;
+    }
+  };
+
+  // x = A * vec_op
+  template<typename T, unsigned int A, typename LHS, typename RHS, typename OP>
+  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const 
hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
+  {
+    static void apply(vector_base<T> & lhs, vector_expression<const 
hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> 
const & rhs)
+    {
+      viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
+      viennacl::vector<T> temp_result(lhs);
+      viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
+      lhs -= temp_result;
+    }
+  };
+
+} // namespace detail
+} // namespace linalg
+
+/** \endcond */
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/io/matrix_market.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/io/matrix_market.hpp 
b/native-viennaCL/src/main/cpp/viennacl/io/matrix_market.hpp
new file mode 100644
index 0000000..e8444ee
--- /dev/null
+++ b/native-viennaCL/src/main/cpp/viennacl/io/matrix_market.hpp
@@ -0,0 +1,440 @@
+#ifndef VIENNACL_IO_MATRIX_MARKET_HPP
+#define VIENNACL_IO_MATRIX_MARKET_HPP
+
+/* =========================================================================
+   Copyright (c) 2010-2016, Institute for Microelectronics,
+                            Institute for Analysis and Scientific Computing,
+                            TU Wien.
+   Portions of this software are copyright by UChicago Argonne, LLC.
+
+                            -----------------
+                  ViennaCL - The Vienna Computing Library
+                            -----------------
+
+   Project Head:    Karl Rupp                   [email protected]
+
+   (A list of authors and contributors can be found in the manual)
+
+   License:         MIT (X11), see file LICENSE in the base directory
+============================================================================= 
*/
+
+
+/** @file matrix_market.hpp
+    @brief A reader and writer for the matrix market format is implemented here
+*/
+
+#include <algorithm>
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <map>
+#include <cctype>
+#include "viennacl/tools/adapter.hpp"
+#include "viennacl/traits/size.hpp"
+#include "viennacl/traits/fill.hpp"
+
+namespace viennacl
+{
+namespace io
+{
+//helper
+namespace detail
+{
+  inline void trim(char * buffer, long max_size)
+  {
+    //trim at beginning of string
+    long start = 0;
+    for (long i=0; i<max_size; ++i)
+    {
+      if (buffer[i] == ' ')
+        ++start;
+      else
+        break;
+    }
+
+    //trim at end of string
+    long stop = start;
+    for (long i=stop; i<max_size; ++i)
+    {
+      if (buffer[i] == 0)   //end of string
+        break;
+
+      if (buffer[i] != ' ')
+        stop = i;
+    }
+
+    for (long i=0; i<=stop - start; ++i)
+    {
+      buffer[i] = buffer[start + i];
+    }
+
+    if (buffer[0] != ' ')
+      buffer[stop - start + 1] = 0; //terminate string
+    else
+      buffer[0] = 0;
+  }
+
+  inline std::string tolower(std::string & s)
+  {
+    std::transform(s.begin(), s.end(), s.begin(), static_cast < int(*)(int) > 
(std::tolower));
+    return s;
+  }
+
+
+
+} //namespace
+
+///////// reader ////////////
+
+/** @brief Reads a sparse or dense matrix from a file (MatrixMarket format)
+*
+* Note: If the matrix in the MatrixMarket file is complex, only the 
real-valued part is loaded!
+*
+* @param mat The matrix that is to be read
+* @param file Filename from which the matrix should be read
+* @param index_base The index base, typically 1
+* @tparam MatrixT A generic matrix type. Type requirements: size1() returns 
number of rows, size2() returns number columns, operator() writes array 
entries, resize() allows resizing the matrix.
+* @return Returns nonzero if file is read correctly
+*/
+template<typename MatrixT>
+long read_matrix_market_file_impl(MatrixT & mat,
+                                  const char * file,
+                                  long index_base)
+{
+  typedef typename viennacl::result_of::cpu_value_type<typename 
viennacl::result_of::value_type<MatrixT>::type>::type    ScalarT;
+
+  //std::cout << "Reading matrix market file" << std::endl;
+  char buffer[1025];
+  std::ifstream reader(file);
+  std::string token;
+  long linenum = 0;
+  bool symmetric = false;
+  bool dense_format = false;
+  bool is_header = true;
+  bool pattern_matrix = false;
+  //bool is_complex = false;
+  long cur_row = 0;
+  long cur_col = 0;
+  long valid_entries = 0;
+  long nnz = 0;
+
+
+  if (!reader){
+    std::cerr << "ViennaCL: Matrix Market Reader: Cannot open file " << file 
<< std::endl;
+    return EXIT_FAILURE;
+  }
+
+  while (reader.good())
+  {
+    // get a non-empty line
+    do
+    {
+      reader.getline(buffer, 1024);
+      ++linenum;
+      detail::trim(buffer, 1024);
+    }
+    while (reader.good() && buffer[0] == 0);
+
+    if (buffer[0] == '%')
+    {
+      if (buffer[1] == '%')
+      {
+        //parse header:
+        std::stringstream line(std::string(buffer + 2));
+        line >> token;
+        if (detail::tolower(token) != "matrixmarket")
+        {
+          std::cerr << "Error in file " << file << " at line " << linenum << " 
in file " << file << ": Expected 'MatrixMarket', got '" << token << "'" << 
std::endl;
+          return 0;
+        }
+
+        line >> token;
+        if (detail::tolower(token) != "matrix")
+        {
+          std::cerr << "Error in file " << file << " at line " << linenum << " 
in file " << file << ": Expected 'matrix', got '" << token << "'" << std::endl;
+          return 0;
+        }
+
+        line >> token;
+        if (detail::tolower(token) != "coordinate")
+        {
+          if (detail::tolower(token) == "array")
+          {
+            dense_format = true;
+            std::cerr << "Error in file " << file << " at line " << linenum << 
" in file " << file << ": 'array' type is not supported yet!" << std::endl;
+            return 0;
+          }
+          else
+          {
+            std::cerr << "Error in file " << file << " at line " << linenum << 
" in file " << file << ": Expected 'array' or 'coordinate', got '" << token << 
"'" << std::endl;
+            return 0;
+          }
+        }
+
+        line >> token;
+        if (detail::tolower(token) == "pattern")
+        {
+          pattern_matrix = true;
+        }
+        else if (detail::tolower(token) == "complex")
+        {
+          //is_complex = true;
+        }
+        else if (detail::tolower(token) != "real")
+        {
+          std::cerr << "Error in file " << file << ": The MatrixMarket reader 
provided with ViennaCL supports only real valued floating point arithmetic or 
pattern type matrices." << std::endl;
+          return 0;
+        }
+
+        line >> token;
+        if (detail::tolower(token) == "general"){ }
+        else if (detail::tolower(token) == "symmetric"){ symmetric = true; }
+        else
+        {
+          std::cerr << "Error in file " << file << ": The MatrixMarket reader 
provided with ViennaCL supports only general or symmetric matrices." << 
std::endl;
+          return 0;
+        }
+
+      }
+    }
+    else
+    {
+      std::stringstream line(std::stringstream::in | std::stringstream::out);
+      line << std::string(buffer);
+
+      if (is_header)
+      {
+        //read header line
+        vcl_size_t rows;
+        vcl_size_t cols;
+
+        if (line.good())
+          line >> rows;
+        else
+        {
+          std::cerr << "Error in file " << file << ": Could not get matrix 
dimensions (rows) in line " << linenum << std::endl;
+          return 0;
+        }
+
+        if (line.good())
+          line >> cols;
+        else
+        {
+          std::cerr << "Error in file " << file << ": Could not get matrix 
dimensions (columns) in line " << linenum << std::endl;
+          return 0;
+        }
+        if (!dense_format)
+        {
+          if (line.good())
+            line >> nnz;
+          else
+          {
+            std::cerr << "Error in file " << file << ": Could not get matrix 
dimensions (columns) in line " << linenum << std::endl;
+            return 0;
+          }
+        }
+
+        if (rows > 0 && cols > 0)
+          viennacl::traits::resize(mat, rows, cols);
+
+        is_header = false;
+      }
+      else
+      {
+        //read data
+        if (dense_format)
+        {
+          ScalarT value;
+          line >> value;
+          viennacl::traits::fill(mat, static_cast<vcl_size_t>(cur_row), 
static_cast<vcl_size_t>(cur_col), value);
+
+          if (++cur_row == static_cast<long>(viennacl::traits::size1(mat)))
+          {
+            //next column
+            ++cur_col;
+            cur_row = 0;
+          }
+        }
+        else //sparse format
+        {
+          long row;
+          long col;
+          ScalarT value = ScalarT(1);
+
+          //parse data:
+          if (line.good())
+            line >> row;
+          else
+          {
+            std::cerr << "Error in file " << file << ": Parse error for matrix 
row entry in line " << linenum << std::endl;
+            return 0;
+          }
+
+          if (line.good())
+            line >> col;
+          else
+          {
+            std::cerr << "Error in file " << file << ": Parse error for matrix 
col entry in line " << linenum << std::endl;
+            return 0;
+          }
+
+          //take index_base base into account:
+          row -= index_base;
+          col -= index_base;
+
+          if (!pattern_matrix) // value for pattern matrix is implicitly 1, so 
we only need to read data for 'normal' matrices
+          {
+            if (line.good())
+            {
+                line >> value;
+            }
+            else
+            {
+              std::cerr << "Error in file " << file << ": Parse error for 
matrix entry in line " << linenum << std::endl;
+              return 0;
+            }
+          }
+
+          if (row >= static_cast<long>(viennacl::traits::size1(mat)) || row < 
0)
+          {
+            std::cerr << "Error in file " << file << " at line " << linenum << 
": Row index out of bounds: " << row << " (matrix dim: " << 
viennacl::traits::size1(mat) << " x " << viennacl::traits::size2(mat) << ")" << 
std::endl;
+            return 0;
+          }
+
+          if (col >= static_cast<long>(viennacl::traits::size2(mat)) || col < 
0)
+          {
+            std::cerr << "Error in file " << file << " at line " << linenum << 
": Column index out of bounds: " << col << " (matrix dim: " << 
viennacl::traits::size1(mat) << " x " << viennacl::traits::size2(mat) << ")" << 
std::endl;
+            return 0;
+          }
+
+          viennacl::traits::fill(mat, static_cast<vcl_size_t>(row), 
static_cast<vcl_size_t>(col), value); //basically equivalent to mat(row, col) = 
value;
+          if (symmetric)
+            viennacl::traits::fill(mat, static_cast<vcl_size_t>(col), 
static_cast<vcl_size_t>(row), value); //basically equivalent to mat(col, row) = 
value;
+
+          if (++valid_entries == nnz)
+            break;
+
+        } //else dense_format
+      }
+    }
+  }
+
+  //std::cout << linenum << " lines read." << std::endl;
+  reader.close();
+  return linenum;
+}
+
+
+/** @brief Reads a sparse matrix from a file (MatrixMarket format)
+*
+* @param mat The matrix that is to be read (ublas-types and std::vector< 
std::map <unsigned int, ScalarT> > are supported)
+* @param file The filename
+* @param index_base The index base, typically 1
+* @tparam MatrixT A generic matrix type. Type requirements: size1() returns 
number of rows, size2() returns number columns, operator() writes array 
entries, resize() allows resizing the matrix.
+* @return Returns nonzero if file is read correctly
+*/
+template<typename MatrixT>
+long read_matrix_market_file(MatrixT & mat,
+                             const char * file,
+                             long index_base = 1)
+{
+  return read_matrix_market_file_impl(mat, file, index_base);
+}
+
+template<typename MatrixT>
+long read_matrix_market_file(MatrixT & mat,
+                             const std::string & file,
+                             long index_base = 1)
+{
+  return read_matrix_market_file_impl(mat, file.c_str(), index_base);
+}
+
+template<typename ScalarT>
+long read_matrix_market_file(std::vector< std::map<unsigned int, ScalarT> > & 
mat,
+                             const char * file,
+                             long index_base = 1)
+{
+  viennacl::tools::sparse_matrix_adapter<ScalarT> adapted_matrix(mat);
+  return read_matrix_market_file_impl(adapted_matrix, file, index_base);
+}
+
+template<typename ScalarT>
+long read_matrix_market_file(std::vector< std::map<unsigned int, ScalarT> > & 
mat,
+                             const std::string & file,
+                             long index_base = 1)
+{
+  viennacl::tools::sparse_matrix_adapter<ScalarT> adapted_matrix(mat);
+  return read_matrix_market_file_impl(adapted_matrix, file.c_str(), 
index_base);
+}
+
+
+////////// writer /////////////
+template<typename MatrixT>
+void write_matrix_market_file_impl(MatrixT const & mat, const char * file, 
long index_base)
+{
+  std::ofstream writer(file);
+
+  long num_entries = 0;
+  for (typename MatrixT::const_iterator1 row_it = mat.begin1();
+       row_it != mat.end1();
+       ++row_it)
+    for (typename MatrixT::const_iterator2 col_it = row_it.begin();
+         col_it != row_it.end();
+         ++col_it)
+      ++num_entries;
+
+  writer << "%%MatrixMarket matrix coordinate real general" << std::endl;
+  writer << mat.size1() << " " << mat.size2() << " " << num_entries << 
std::endl;
+
+  for (typename MatrixT::const_iterator1 row_it = mat.begin1();
+       row_it != mat.end1();
+       ++row_it)
+    for (typename MatrixT::const_iterator2 col_it = row_it.begin();
+         col_it != row_it.end();
+         ++col_it)
+      writer << col_it.index1() + index_base << " " << col_it.index2() + 
index_base << " " << *col_it << std::endl;
+
+  writer.close();
+}
+
+template<typename ScalarT>
+void write_matrix_market_file(std::vector< std::map<unsigned int, ScalarT> > 
const & mat,
+                              const char * file,
+                              long index_base = 1)
+{
+  viennacl::tools::const_sparse_matrix_adapter<ScalarT> adapted_matrix(mat);
+  return write_matrix_market_file_impl(adapted_matrix, file, index_base);
+}
+
+template<typename ScalarT>
+void write_matrix_market_file(std::vector< std::map<unsigned int, ScalarT> > 
const & mat,
+                              const std::string & file,
+                              long index_base = 1)
+{
+  viennacl::tools::const_sparse_matrix_adapter<ScalarT> adapted_matrix(mat);
+  return write_matrix_market_file_impl(adapted_matrix, file.c_str(), 
index_base);
+}
+
+/** @brief Writes a sparse matrix to a file (MatrixMarket format)
+*
+* @param mat The matrix that is to be read (ublas-types and std::vector< 
std::map <unsigned int, ScalarT> > are supported)
+* @param file The filename
+* @param index_base The index base, typically 1
+* @tparam MatrixT A generic matrix type. Type requirements: size1() returns 
number of rows, size2() returns number columns, operator() writes array 
entries, resize() allows resizing the matrix.
+* @return Returns nonzero if file is read correctly
+*/
+template<typename MatrixT>
+void write_matrix_market_file(MatrixT const & mat,
+                              const std::string & file,
+                              long index_base = 1)
+{
+  write_matrix_market_file_impl(mat, file.c_str(), index_base);
+}
+
+
+} //namespace io
+} //namespace viennacl
+
+#endif

Reply via email to