http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/qr.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/qr.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/qr.hpp
deleted file mode 100644
index 907eb57..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/qr.hpp
+++ /dev/null
@@ -1,497 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
-#define VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/spai/qr.hpp
-    @brief Implementation of a simultaneous QR factorization of multiple 
matrices. Experimental.
-
-    SPAI code contributed by Nikolay Lukash
-*/
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <algorithm>
-#include <vector>
-#include <math.h>
-#include <cmath>
-#include <sstream>
-#include "viennacl/ocl/backend.hpp"
-#include "boost/numeric/ublas/vector.hpp"
-#include "boost/numeric/ublas/matrix.hpp"
-#include "boost/numeric/ublas/matrix_proxy.hpp"
-#include "boost/numeric/ublas/storage.hpp"
-#include "boost/numeric/ublas/io.hpp"
-#include "boost/numeric/ublas/matrix_expression.hpp"
-#include "boost/numeric/ublas/detail/matrix_assign.hpp"
-
-#include "viennacl/vector.hpp"
-#include "viennacl/matrix.hpp"
-
-#include "viennacl/linalg/detail/spai/block_matrix.hpp"
-#include "viennacl/linalg/detail/spai/block_vector.hpp"
-#include "viennacl/linalg/opencl/kernels/spai.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-namespace spai
-{
-
-//********** DEBUG FUNCTIONS *****************//
-template< typename T, typename InputIteratorT>
-void Print(std::ostream & ostr, InputIteratorT it_begin, InputIteratorT it_end)
-{
-  //std::ostream_iterator<int> it_os(ostr, delimiter);
-  std::string delimiters = " ";
-  std::copy(it_begin, it_end, std::ostream_iterator<T>(ostr, 
delimiters.c_str()));
-  ostr << std::endl;
-}
-
-template<typename VectorT, typename MatrixT>
-void write_to_block(VectorT & con_A_I_J,
-                    unsigned int start_ind,
-                    std::vector<unsigned int> const & I,
-                    std::vector<unsigned int> const & J,
-                    MatrixT& m)
-{
-  m.resize(I.size(), J.size(), false);
-  for (vcl_size_t i = 0; i < J.size(); ++i)
-    for (vcl_size_t j = 0; j < I.size(); ++j)
-      m(j,i) = con_A_I_J[start_ind + i*I.size() + j];
-}
-
-template<typename VectorT>
-void print_continious_matrix(VectorT & con_A_I_J,
-                             std::vector<cl_uint> & blocks_ind,
-                             std::vector<std::vector<unsigned int> > const & 
g_I,
-                             std::vector<std::vector<unsigned int> > const & 
g_J)
-{
-  typedef typename VectorT::value_type        NumericType;
-
-  std::vector<boost::numeric::ublas::matrix<NumericType> > 
com_A_I_J(g_I.size());
-  for (vcl_size_t i = 0; i < g_I.size(); ++i)
-  {
-    write_to_block(con_A_I_J, blocks_ind[i], g_I[i], g_J[i], com_A_I_J[i]);
-    std::cout << com_A_I_J[i] << std::endl;
-  }
-}
-
-template<typename VectorT>
-void print_continious_vector(VectorT & con_v,
-                             std::vector<cl_uint> & block_ind,
-                             std::vector<std::vector<unsigned int> > const & 
g_J)
-{
-  typedef typename VectorT::value_type     NumericType;
-
-  std::vector<boost::numeric::ublas::vector<NumericType> > com_v(g_J.size());
-  //Print<ScalarType>(std::cout, con_v.begin(), con_v.end());
-  for (vcl_size_t i = 0; i < g_J.size(); ++i)
-  {
-    com_v[i].resize(g_J[i].size());
-    for (vcl_size_t j = 0; j < g_J[i].size(); ++j)
-      com_v[i](j) = con_v[block_ind[i] + j];
-    std::cout << com_v[i] << std::endl;
-  }
-}
-
-
-///**************************************** BLOCK FUNCTIONS 
************************************//
-
-/** @brief Computes size of elements, start indices and matrix dimensions for 
a certain block
- *
- * @param g_I         container of row indices
- * @param g_J         container of column indices
- * @param sz          general size for all elements in a certain block
- * @param blocks_ind  start indices in a certain
- * @param matrix_dims matrix dimensions for each block
- */
-inline void compute_blocks_size(std::vector<std::vector<unsigned int> > const 
& g_I,
-                                std::vector<std::vector<unsigned int> > const 
& g_J,
-                                unsigned int& sz,
-                                std::vector<cl_uint> & blocks_ind,
-                                std::vector<cl_uint> & matrix_dims)
-{
-  sz = 0;
-  for (vcl_size_t i = 0; i < g_I.size(); ++i)
-  {
-    sz += static_cast<unsigned int>(g_I[i].size()*g_J[i].size());
-    matrix_dims[2*i] = static_cast<cl_uint>(g_I[i].size());
-    matrix_dims[2*i + 1] = static_cast<cl_uint>(g_J[i].size());
-    blocks_ind[i+1] = blocks_ind[i] + 
static_cast<cl_uint>(g_I[i].size()*g_J[i].size());
-  }
-}
-
-/** @brief Computes size of particular container of index set
- *
- * @param inds   container of index sets
- * @param size   output size
- */
-template<typename SizeT>
-void get_size(std::vector<std::vector<SizeT> > const & inds,
-              SizeT & size)
-{
-  size = 0;
-  for (vcl_size_t i = 0; i < inds.size(); ++i)
-    size += static_cast<unsigned int>(inds[i].size());
-}
-
-/** @brief Initializes start indices of particular index set
- *
- * @param inds         container of index sets
- * @param start_inds   output index set
- */
-template<typename SizeT>
-void init_start_inds(std::vector<std::vector<SizeT> > const & inds,
-                     std::vector<cl_uint>& start_inds)
-{
-  for (vcl_size_t i = 0; i < inds.size(); ++i)
-    start_inds[i+1] = start_inds[i] + static_cast<cl_uint>(inds[i].size());
-}
-
-
-//*************************************  QR FUNCTIONS  
***************************************//
-
-/** @brief Dot prod of particular column of martix A with it's self starting 
at a certain index beg_ind
- *
- * @param A        init matrix
- * @param beg_ind  starting index
- * @param res      result of dot product
- */
-template<typename MatrixT, typename NumericT>
-void dot_prod(MatrixT const & A,
-              unsigned int beg_ind,
-              NumericT & res)
-{
-  res = NumericT(0);
-  for (vcl_size_t i = beg_ind; i < A.size1(); ++i)
-    res += A(i, beg_ind-1)*A(i, beg_ind-1);
-}
-
-/** @brief Dot prod of particular matrix column with arbitrary vector: A(:, 
col_ind)
- *
- * @param A           init matrix
- * @param v           input vector
- * @param col_ind     starting column index
- * @param start_ind   starting index inside column
- * @param res         result of dot product
- */
-template<typename MatrixT, typename VectorT, typename NumericT>
-void custom_inner_prod(MatrixT const & A,
-                       VectorT const & v,
-                       unsigned int col_ind,
-                       unsigned int start_ind,
-                       NumericT & res)
-{
-  res = static_cast<NumericT>(0);
-  for (unsigned int i = start_ind; i < static_cast<unsigned int>(A.size1()); 
++i)
-    res += A(i, col_ind)*v(i);
-}
-
-/** @brief Copying part of matrix column
- *
- * @param A         init matrix
- * @param v         output vector
- * @param beg_ind   start index for copying
- */
-template<typename MatrixT, typename VectorT>
-void copy_vector(MatrixT const & A,
-                 VectorT       & v,
-                 unsigned int beg_ind)
-{
-  for (unsigned int i = beg_ind; i < static_cast<unsigned int>(A.size1()); ++i)
-    v(i) = A( i, beg_ind-1);
-}
-
-
-//householder reflection c.f. Gene H. Golub, Charles F. Van Loan "Matrix 
Computations" 3rd edition p.210
-/** @brief Computation of Householder vector, householder reflection c.f. Gene 
H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.210
- *
- * @param A     init matrix
- * @param j     start index for computations
- * @param v     output Householder vector
- * @param b     beta
- */
-template<typename MatrixT, typename VectorT, typename NumericT>
-void householder_vector(MatrixT const & A,
-                        unsigned int j,
-                        VectorT & v,
-                        NumericT & b)
-{
-  NumericT sg;
-
-  dot_prod(A, j+1, sg);
-  copy_vector(A, v, j+1);
-  NumericT mu;
-  v(j) = static_cast<NumericT>(1.0);
-  if (!sg)
-    b = 0;
-  else
-  {
-    mu = std::sqrt(A(j,j)*A(j, j) + sg);
-    if (A(j, j) <= 0)
-      v(j) = A(j, j) - mu;
-    else
-      v(j) = -sg/(A(j, j) + mu);
-
-    b = 2*(v(j)*v(j))/(sg + v(j)*v(j));
-    v = v/v(j);
-  }
-}
-
-
-/** @brief Inplace application of Householder vector to a matrix A
- *
- * @param A          init matrix
- * @param iter_cnt   current iteration
- * @param v          Householder vector
- * @param b          beta
- */
-template<typename MatrixT, typename VectorT, typename NumericT>
-void apply_householder_reflection(MatrixT & A,
-                                  unsigned int iter_cnt,
-                                  VectorT & v,
-                                  NumericT b)
-{
-  //update every column of matrix A
-  NumericT in_prod_res;
-
-  for (unsigned int i = iter_cnt; i < static_cast<unsigned int>(A.size2()); 
++i)
-  {
-    //update each column in a fashion: ai = ai - b*v*(v'*ai)
-    custom_inner_prod(A, v, i, iter_cnt, in_prod_res);
-    for (unsigned int j = iter_cnt; j < static_cast<unsigned int>(A.size1()); 
++j)
-      A(j, i) -= b*in_prod_res*v(j);
-  }
-}
-
-/** @brief Storage of vector v in column(A, ind), starting from ind-1 index of 
a column
- *
- * @param A     init matrix
- * @param ind   index of a column
- * @param v     vector that should be stored
- */
-template<typename MatrixT, typename VectorT>
-void store_householder_vector(MatrixT & A,
-                              unsigned int ind,
-                              VectorT & v)
-{
-  for (unsigned int i = ind; i < static_cast<unsigned int>(A.size1()); ++i)
-    A(i, ind-1) = v(i);
-}
-
-
-//QR algorithm
-/** @brief Inplace QR factorization via Householder reflections c.f. Gene H. 
Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224
- *
- * @param R     input matrix
- * @param b_v   vector of betas
- */
-template<typename MatrixT, typename VectorT>
-void single_qr(MatrixT & R, VectorT & b_v)
-{
-  typedef typename MatrixT::value_type     NumericType;
-
-  if ((R.size1() > 0) && (R.size2() > 0))
-  {
-    VectorT v = 
static_cast<VectorT>(boost::numeric::ublas::zero_vector<NumericType>(R.size1()));
-    b_v = 
static_cast<VectorT>(boost::numeric::ublas::zero_vector<NumericType>(R.size2()));
-
-    for (unsigned int i = 0; i < static_cast<unsigned int>(R.size2()); ++i)
-    {
-      householder_vector(R, i, v, b_v[i]);
-      apply_householder_reflection(R, i, v, b_v[i]);
-      if (i < R.size1())
-        store_householder_vector(R, i+1, v);
-    }
-  }
-}
-
-//********************** HELP FUNCTIONS FOR GPU-based QR factorization 
*************************//
-
-/** @brief Getting max size of rows/columns from container of index set
- *
- * @param inds        container of index set
- * @param max_size    max size that corresponds to that container
- */
-template<typename SizeT>
-void get_max_block_size(std::vector<std::vector<SizeT> > const & inds,
-                        SizeT & max_size)
-{
-  max_size = 0;
-  for (vcl_size_t i = 0; i < inds.size(); ++i)
-    if (inds[i].size() > max_size)
-      max_size = static_cast<SizeT>(inds[i].size());
-}
-
-/** @brief Dot_prod(column(A, ind), v) starting from index ind+1
- *
- * @param A      input matrix
- * @param v      input vector
- * @param ind    index
- * @param res    result value
- */
-template<typename MatrixT, typename VectorT, typename NumericT>
-void custom_dot_prod(MatrixT const & A,
-                     VectorT const & v,
-                     unsigned int ind,
-                     NumericT & res)
-{
-  res = static_cast<NumericT>(0);
-  for (unsigned int j = ind; j < A.size1(); ++j)
-  {
-    if (j == ind)
-      res += v(j);
-    else
-      res += A(j, ind)*v(j);
-  }
-}
-
-/** @brief Recovery Q from matrix R and vector of betas b_v
- *
- * @param R      input matrix
- * @param b_v    vector of betas
- * @param y      output vector
- */
-template<typename MatrixT, typename VectorT>
-void apply_q_trans_vec(MatrixT const & R,
-                       VectorT const & b_v,
-                       VectorT       & y)
-{
-  typedef typename MatrixT::value_type     NumericT;
-
-  NumericT inn_prod = NumericT(0);
-  for (vcl_size_t i = 0; i < R.size2(); ++i)
-  {
-    custom_dot_prod(R, y, static_cast<unsigned int>(i), inn_prod);
-    for (vcl_size_t j = i; j < R.size1(); ++j)
-    {
-      if (i == j)
-        y(j) -= b_v(i)*inn_prod;
-      else
-        y(j) -= b_v(i)*inn_prod*R(j,i);
-    }
-  }
-}
-
-/** @brief Multiplication of Q'*A, where Q is in implicit for lower part of R 
and vector of betas - b_v
- *
- * @param R      input matrix
- * @param b_v    vector of betas
- * @param A      output matrix
- */
-template<typename MatrixT, typename VectorT>
-void apply_q_trans_mat(MatrixT const & R,
-                       VectorT const & b_v,
-                       MatrixT       & A)
-{
-  VectorT tmp_v;
-  for (vcl_size_t i = 0; i < A.size2(); ++i)
-  {
-    tmp_v = static_cast<VectorT>(column(A,i));
-    apply_q_trans_vec(R, b_v, tmp_v);
-    column(A,i) = tmp_v;
-  }
-}
-
-//parallel QR for GPU
-/** @brief Inplace QR factorization via Householder reflections c.f. Gene H. 
Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224 performed on 
GPU
- *
- * @param g_I         container of row indices
- * @param g_J         container of column indices
- * @param g_A_I_J_vcl contigious matrices, GPU memory is used
- * @param g_bv_vcl    contigiuos vectors beta, GPU memory is used
- * @param g_is_update container of indicators that show active blocks
- * @param ctx         Optional context in which the auxiliary data is created 
(one out of multiple OpenCL contexts, CUDA, host)
- */
-template<typename NumericT>
-void block_qr(std::vector<std::vector<unsigned int> > & g_I,
-              std::vector<std::vector<unsigned int> > & g_J,
-              block_matrix & g_A_I_J_vcl,
-              block_vector & g_bv_vcl,
-              std::vector<cl_uint> & g_is_update,
-              viennacl::context ctx)
-{
-  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context 
&>(ctx.opencl_context());
-
-  //typedef typename MatrixType::value_type ScalarType;
-  unsigned int bv_size = 0;
-  unsigned int v_size = 0;
-  //set up arguments for GPU
-  //find maximum size of rows/columns
-  unsigned int local_r_n = 0;
-  unsigned int local_c_n = 0;
-  //find max size for blocks
-  get_max_block_size(g_I, local_r_n);
-  get_max_block_size(g_J, local_c_n);
-  //get size
-  get_size(g_J, bv_size);
-  get_size(g_I, v_size);
-  //get start indices
-  std::vector<cl_uint> start_bv_inds(g_I.size() + 1, 0);
-  std::vector<cl_uint> start_v_inds(g_I.size() + 1, 0);
-  init_start_inds(g_J, start_bv_inds);
-  init_start_inds(g_I, start_v_inds);
-  //init arrays
-  std::vector<NumericT> b_v(bv_size, NumericT(0));
-  std::vector<NumericT>   v(v_size,  NumericT(0));
-  //call qr program
-  block_vector v_vcl;
-
-  g_bv_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                               static_cast<unsigned 
int>(sizeof(NumericT)*bv_size),
-                                               &(b_v[0]));
-
-  v_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                            static_cast<unsigned 
int>(sizeof(NumericT)*v_size),
-                                            &(v[0]));
-  //the same as j_start_inds
-  g_bv_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                static_cast<unsigned 
int>(sizeof(cl_uint)*g_I.size()),
-                                                &(start_bv_inds[0]));
-
-  v_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                             static_cast<unsigned 
int>(sizeof(cl_uint)*g_I.size()),
-                                             &(start_v_inds[0]));
-  viennacl::ocl::handle<cl_mem> g_is_update_vcl = 
opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                                           
static_cast<unsigned int>(sizeof(cl_uint)*g_is_update.size()),
-                                                                           
&(g_is_update[0]));
-  //local memory
-  //viennacl::ocl::enqueue(k(vcl_vec, size, 
viennacl::ocl::local_mem(sizeof(SCALARTYPE) * k.local_work_size()), temp));
-  viennacl::linalg::opencl::kernels::spai<NumericT>::init(opencl_ctx);
-  viennacl::ocl::kernel & qr_kernel = 
opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<NumericT>::program_name(),
 "block_qr");
-
-  qr_kernel.local_work_size(0, local_c_n);
-  qr_kernel.global_work_size(0, local_c_n*256);
-  viennacl::ocl::enqueue(qr_kernel(g_A_I_J_vcl.handle(), 
g_A_I_J_vcl.handle1(), g_bv_vcl.handle(),
-                                  v_vcl.handle(), g_A_I_J_vcl.handle2(),
-                                  g_bv_vcl.handle1(), v_vcl.handle1(), 
g_is_update_vcl,
-                                  
viennacl::ocl::local_mem(static_cast<unsigned 
int>(sizeof(NumericT)*(local_r_n*local_c_n))),
-                                  static_cast<cl_uint>(g_I.size())));
-
-}
-}
-}
-}
-}
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/small_matrix.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/small_matrix.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/small_matrix.hpp
deleted file mode 100644
index 3cfdbb3..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/small_matrix.hpp
+++ /dev/null
@@ -1,113 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_SPAI_SMALL_MATRIX_HPP
-#define VIENNACL_LINALG_DETAIL_SPAI_SMALL_MATRIX_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/spai/small_matrix.hpp
-    @brief Implementation of a routines for small matrices (helper for SPAI). 
Experimental.
-
-    SPAI code contributed by Nikolay Lukash
-*/
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <algorithm>
-#include <vector>
-#include <math.h>
-#include <map>
-#include "boost/numeric/ublas/vector.hpp"
-#include "boost/numeric/ublas/matrix.hpp"
-#include "boost/numeric/ublas/matrix_proxy.hpp"
-#include "boost/numeric/ublas/vector_proxy.hpp"
-#include "boost/numeric/ublas/storage.hpp"
-#include "boost/numeric/ublas/io.hpp"
-#include "boost/numeric/ublas/lu.hpp"
-#include "boost/numeric/ublas/triangular.hpp"
-#include "boost/numeric/ublas/matrix_expression.hpp"
-#include "boost/numeric/ublas/detail/matrix_assign.hpp"
-
-#include "viennacl/forwards.h"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-namespace spai
-{
-
-//
-// Constructs an orthonormal sparse matrix M (with M^T M = Id). Is composed of 
elementary 2x2 rotation matrices with suitable renumbering.
-//
-template<typename MatrixT>
-void make_rotation_matrix(MatrixT & mat,
-                          vcl_size_t new_size,
-                          vcl_size_t off_diagonal_distance = 4)
-{
-  mat.resize(new_size, new_size, false);
-  mat.clear();
-
-  double val = 1.0 / std::sqrt(2.0);
-
-  for (vcl_size_t i=0; i<new_size; ++i)
-    mat(i,i) = val;
-
-  for (vcl_size_t i=off_diagonal_distance; i<new_size; ++i)
-  {
-    mat(i-off_diagonal_distance, i)                       = val;
-    mat(i,                       i-off_diagonal_distance) = -val;
-  }
-
-}
-
-
-//calcualtes matrix determinant
-template<typename MatrixT>
-double determinant(boost::numeric::ublas::matrix_expression<MatrixT> const & 
mat_r)
-{
-  double det = 1.0;
-
-  MatrixT mLu(mat_r());
-  boost::numeric::ublas::permutation_matrix<vcl_size_t> 
pivots(mat_r().size1());
-
-  int is_singular = static_cast<int>(lu_factorize(mLu, pivots));
-
-  if (!is_singular)
-  {
-    for (vcl_size_t i=0; i < pivots.size(); ++i)
-    {
-      if (pivots(i) != i)
-        det *= -1.0;
-
-      det *= mLu(i,i);
-    }
-  }
-  else
-    det = 0.0;
-
-  return det;
-}
-
-}
-}
-}
-}
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/spai-dynamic.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/spai-dynamic.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/spai-dynamic.hpp
deleted file mode 100644
index bac0b9e..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/spai-dynamic.hpp
+++ /dev/null
@@ -1,687 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
-#define VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/spai/spai-dynamic.hpp
-    @brief Implementation of a dynamic SPAI. Provides the routines for 
automatic pattern updates Experimental.
-
-    SPAI code contributed by Nikolay Lukash
-*/
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <algorithm>
-#include <vector>
-#include <math.h>
-#include <map>
-//#include "block_matrix.hpp"
-//#include "block_vector.hpp"
-//#include "benchmark-utils.hpp"
-#include "boost/numeric/ublas/vector.hpp"
-#include "boost/numeric/ublas/matrix.hpp"
-#include "boost/numeric/ublas/matrix_proxy.hpp"
-#include "boost/numeric/ublas/vector_proxy.hpp"
-#include "boost/numeric/ublas/storage.hpp"
-#include "boost/numeric/ublas/io.hpp"
-#include "boost/numeric/ublas/lu.hpp"
-#include "boost/numeric/ublas/triangular.hpp"
-#include "boost/numeric/ublas/matrix_expression.hpp"
-// ViennaCL includes
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/compressed_matrix.hpp"
-#include "viennacl/linalg/sparse_matrix_operations.hpp"
-#include "viennacl/linalg/matrix_operations.hpp"
-#include "viennacl/scalar.hpp"
-#include "viennacl/linalg/cg.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/ilu.hpp"
-#include "viennacl/ocl/backend.hpp"
-
-#include "viennacl/linalg/detail/spai/block_matrix.hpp"
-#include "viennacl/linalg/detail/spai/block_vector.hpp"
-#include "viennacl/linalg/detail/spai/qr.hpp"
-#include "viennacl/linalg/detail/spai/spai-static.hpp"
-#include "viennacl/linalg/detail/spai/spai.hpp"
-#include "viennacl/linalg/detail/spai/spai_tag.hpp"
-#include "viennacl/linalg/opencl/kernels/spai.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-namespace spai
-{
-
-/** @brief Helper functor for comparing std::pair<> based on the second 
member. */
-struct CompareSecond
-{
-  template<typename T1, typename T2>
-  bool operator()(std::pair<T1, T2> const & left, std::pair<T1, T2> const & 
right)
-  {
-    return static_cast<double>(left.second) > 
static_cast<double>(right.second);
-  }
-};
-
-
-/** @brief Composition of new matrix R, that is going to be used in Least 
Square problem solving
- *
- * @param A      matrix Q'*A(I, \\tilde J), where \\tilde J - set of new 
column indices
- * @param R_n    matrix A_Iu_J_u after QR factorization
- * @param R      previously composed matrix R
- */
-template<typename MatrixT>
-void composeNewR(MatrixT const & A,
-                 MatrixT const & R_n,
-                 MatrixT & R)
-{
-  typedef typename MatrixT::value_type        NumericType;
-
-  vcl_size_t row_n = R_n.size1() - (A.size1() - R.size2());
-  MatrixT C = boost::numeric::ublas::zero_matrix<NumericType>(R.size1() + 
row_n, R.size2() + A.size2());
-
-  //write original R to new Composite R
-  boost::numeric::ublas::project(C, boost::numeric::ublas::range(0,R.size1()), 
boost::numeric::ublas::range(0, R.size2())) += R;
-  //write upper part of Q'*A_I_\hatJ, all columns and number of rows that 
equals to R.size2()
-  boost::numeric::ublas::project(C, boost::numeric::ublas::range(0, 
R.size2()), boost::numeric::ublas::range(R.size2(),
-                                                                               
                             R.size2() + A.size2())) +=
-  boost::numeric::ublas::project(A, boost::numeric::ublas::range(0, 
R.size2()), boost::numeric::ublas::range(0, A.size2()));
-
-  //adding decomposed(QR) block to Composite R
-  if (R_n.size1() > 0 && R_n.size2() > 0)
-      boost::numeric::ublas::project(C,
-                                     boost::numeric::ublas::range(R.size2(), 
R.size1() + row_n),
-                                     boost::numeric::ublas::range(R.size2(), 
R.size2() + A.size2())) += R_n;
-  R = C;
-}
-
-/** @brief Composition of new vector of coefficients beta from QR 
factorizations(necessary for Q recovery)
- *
- * @param v_n     new vector from last QR factorization
- * @param v       composition of previous vectors from QR factorizations
- */
-template<typename VectorT>
-void composeNewVector(VectorT const & v_n,
-                      VectorT       & v)
-{
-  typedef typename VectorT::value_type          NumericType;
-
-  VectorT w  = boost::numeric::ublas::zero_vector<NumericType>(v.size() + 
v_n.size());
-  boost::numeric::ublas::project(w, boost::numeric::ublas::range(0, v.size())) 
+= v;
-  boost::numeric::ublas::project(w, boost::numeric::ublas::range(v.size(), 
v.size() + v_n.size())) += v_n;
-  v = w;
-}
-
-/** @brief Computation of Euclidean norm for sparse vector
- *
- * @param v      initial sparse vector
- * @param norm   scalar that represents Euclidean norm
- */
-template<typename SparseVectorT, typename NumericT>
-void sparse_norm_2(SparseVectorT const & v,
-                   NumericT & norm)
-{
-  for (typename SparseVectorT::const_iterator vec_it  = v.begin(); vec_it != 
v.end(); ++vec_it)
-    norm += (vec_it->second)*(vec_it->second);
-
-  norm = std::sqrt(norm);
-}
-
-/** @brief Dot product of two sparse vectors
- *
- * @param v1     initial sparse vector
- * @param v2     initial sparse vector
- * @param res_v  scalar that represents dot product result
- */
-template<typename SparseVectorT, typename NumericT>
-void sparse_inner_prod(SparseVectorT const & v1,
-                       SparseVectorT const & v2,
-                       NumericT & res_v)
-{
-  typename SparseVectorT::const_iterator v_it1 = v1.begin();
-  typename SparseVectorT::const_iterator v_it2 = v2.begin();
-
-  while ((v_it1 != v1.end())&&(v_it2 != v2.end()))
-  {
-    if (v_it1->first == v_it2->first)
-    {
-      res_v += (v_it1->second)*(v_it2->second);
-      ++v_it1;
-      ++v_it2;
-    }
-    else if (v_it1->first < v_it2->first)
-      ++v_it1;
-    else
-      ++v_it2;
-  }
-}
-
-/** @brief Building a new set of column indices J_u, cf. Kallischko 
dissertation p.31
- *
- * @param A_v_c  vectorized column-wise initial matrix
- * @param res    residual vector
- * @param J      set of column indices
- * @param J_u    set of new column indices
- * @param tag    SPAI tag with parameters
- */
-template<typename SparseVectorT, typename NumericT>
-bool buildAugmentedIndexSet(std::vector<SparseVectorT> const & A_v_c,
-                            SparseVectorT const & res,
-                            std::vector<unsigned int> & J,
-                            std::vector<unsigned int> & J_u,
-                            spai_tag const & tag)
-{
-  std::vector<std::pair<unsigned int, NumericT> > p;
-  vcl_size_t cur_size = 0;
-  NumericT inprod, norm2;
-
-  for (typename SparseVectorT::const_iterator res_it = res.begin(); res_it != 
res.end(); ++res_it)
-  {
-    if (!isInIndexSet(J, res_it->first) && (std::fabs(res_it->second) > 
tag.getResidualThreshold()))
-    {
-      inprod = norm2 = 0;
-      sparse_inner_prod(res, A_v_c[res_it->first], inprod);
-      sparse_norm_2(A_v_c[res_it->first], norm2);
-      p.push_back(std::pair<unsigned int, NumericT>(res_it->first, 
(inprod*inprod)/(norm2*norm2)));
-    }
-  }
-
-  std::sort(p.begin(), p.end(), CompareSecond());
-  while ((cur_size < J.size()) && (p.size() > 0))
-  {
-    J_u.push_back(p[0].first);
-    p.erase(p.begin());
-    cur_size++;
-  }
-  p.clear();
-  return (cur_size > 0);
-}
-
-/** @brief Building a new indices to current set of row indices I_n, cf. 
Kallischko dissertation p.32
- *
- * @param A_v_c    vectorized column-wise initial matrix
- * @param I        set of previous determined row indices
- * @param J_n      set of new column indices
- * @param I_n      set of new indices
- */
-template<typename SparseVectorT>
-void buildNewRowSet(std::vector<SparseVectorT> const & A_v_c,
-                    std::vector<unsigned int>  const & I,
-                    std::vector<unsigned int>  const & J_n,
-                    std::vector<unsigned int>        & I_n)
-{
-  for (vcl_size_t i = 0; i < J_n.size(); ++i)
-  {
-    for (typename SparseVectorT::const_iterator col_it = 
A_v_c[J_n[i]].begin(); col_it!=A_v_c[J_n[i]].end(); ++col_it)
-    {
-      if (!isInIndexSet(I, col_it->first) && !isInIndexSet(I_n, col_it->first))
-        I_n.push_back(col_it->first);
-    }
-  }
-}
-
-/** @brief Composition of new block for QR factorization cf. Kallischko 
dissertation p.82, figure 4.7
- *
- * @param A_I_J       previously composed block
- * @param A_I_J_u     matrix Q'*A(I, \\tilde J), where \\tilde J - set of new 
column indices
- * @param A_I_u_J_u   is composition of lower part A(I, \\tilde J) and  
A(\\tilde I, \\tilde J) - new block for QR decomposition
- */
-template<typename MatrixT>
-void QRBlockComposition(MatrixT const & A_I_J,
-                        MatrixT const & A_I_J_u,
-                        MatrixT       & A_I_u_J_u)
-{
-  typedef typename MatrixT::value_type     NumericType;
-
-  vcl_size_t row_n1 = A_I_J_u.size1() - A_I_J.size2();
-  vcl_size_t row_n2 = A_I_u_J_u.size1();
-  vcl_size_t row_n = row_n1 + row_n2;
-  vcl_size_t col_n = A_I_J_u.size2();
-
-  MatrixT C = boost::numeric::ublas::zero_matrix<NumericType>(row_n, col_n);
-  boost::numeric::ublas::project(C,
-                                 boost::numeric::ublas::range(0, row_n1),
-                                 boost::numeric::ublas::range(0, col_n))
-  += boost::numeric::ublas::project(A_I_J_u,
-                                    
boost::numeric::ublas::range(A_I_J.size2(), A_I_J_u.size1()),
-                                    boost::numeric::ublas::range(0, col_n));
-
-  boost::numeric::ublas::project(C,
-                                 boost::numeric::ublas::range(row_n1, row_n1 + 
row_n2),
-                                 boost::numeric::ublas::range(0, col_n)) += 
A_I_u_J_u;
-  A_I_u_J_u = C;
-}
-
-/** @brief CPU-based dynamic update for SPAI preconditioner
- *
- * @param A            initial sparse matrix
- * @param A_v_c        vectorized column-wise initial matrix
- * @param g_res        container of residuals for all columns
- * @param g_is_update  container with identificators that shows which block 
should be modified
- * @param g_I          container of row index sets for all columns
- * @param g_J          container of column index sets for all columns
- * @param g_b_v        container of vectors of beta for Q recovery(cf. Golub 
Van Loan "Matrix Computations", 3rd edition p.211)
- * @param g_A_I_J      container of block matrices from previous update
- * @param tag          SPAI configuration tag
- */
-template<typename SparseMatrixT,
-         typename SparseVectorT,
-         typename DenseMatrixT,
-         typename VectorT>
-void block_update(SparseMatrixT const & A,
-                  std::vector<SparseVectorT> const & A_v_c,
-                  std::vector<SparseVectorT>       & g_res,
-                  std::vector<bool> & g_is_update,
-                  std::vector<std::vector<unsigned int> >& g_I,
-                  std::vector<std::vector<unsigned int> >& g_J,
-                  std::vector<VectorT>      & g_b_v,
-                  std::vector<DenseMatrixT> & g_A_I_J,
-                  spai_tag const & tag)
-{
-  typedef typename DenseMatrixT::value_type     NumericType;
-
-
-  std::vector<std::vector<unsigned int> > g_J_u(g_J.size());   // set of new 
column indices
-  std::vector<std::vector<unsigned int> > g_I_u(g_J.size());   // set of new 
row indices
-  std::vector<DenseMatrixT> g_A_I_J_u(g_J.size());             // matrix A(I, 
\tilde J), cf. Kallischko p.31-32
-  std::vector<DenseMatrixT> g_A_I_u_J_u(g_J.size());           // matrix 
A(\tilde I, \tilde J), cf. Kallischko
-  std::vector<VectorT>      g_b_v_u(g_J.size());               // new vector 
of beta coefficients from QR factorization
-
-#ifdef VIENNACL_WITH_OPENMP
-  #pragma omp parallel for
-#endif
-  for (long i = 0; i < static_cast<long>(g_J.size()); ++i)
-  {
-    if (g_is_update[static_cast<vcl_size_t>(i)])
-    {
-      if (buildAugmentedIndexSet<SparseVectorT, NumericType>(A_v_c, 
g_res[static_cast<vcl_size_t>(i)], g_J[static_cast<vcl_size_t>(i)], 
g_J_u[static_cast<vcl_size_t>(i)], tag))
-      {
-        //initialize matrix A_I_\hatJ
-        initProjectSubMatrix(A, g_J_u[static_cast<vcl_size_t>(i)], 
g_I[static_cast<vcl_size_t>(i)], g_A_I_J_u[static_cast<vcl_size_t>(i)]);
-        //multiplication of Q'*A_I_\hatJ
-        apply_q_trans_mat(g_A_I_J[static_cast<vcl_size_t>(i)], 
g_b_v[static_cast<vcl_size_t>(i)], g_A_I_J_u[static_cast<vcl_size_t>(i)]);
-        //building new rows index set \hatI
-        buildNewRowSet(A_v_c, g_I[static_cast<vcl_size_t>(i)], 
g_J_u[static_cast<vcl_size_t>(i)], g_I_u[static_cast<vcl_size_t>(i)]);
-        initProjectSubMatrix(A, g_J_u[static_cast<vcl_size_t>(i)], 
g_I_u[static_cast<vcl_size_t>(i)], g_A_I_u_J_u[static_cast<vcl_size_t>(i)]);
-        //composition of block for new QR factorization
-        QRBlockComposition(g_A_I_J[static_cast<vcl_size_t>(i)], 
g_A_I_J_u[static_cast<vcl_size_t>(i)], g_A_I_u_J_u[static_cast<vcl_size_t>(i)]);
-        //QR factorization
-        single_qr(g_A_I_u_J_u[static_cast<vcl_size_t>(i)], 
g_b_v_u[static_cast<vcl_size_t>(i)]);
-        //composition of new R and new vector b_v
-        composeNewR(g_A_I_J_u[static_cast<vcl_size_t>(i)], 
g_A_I_u_J_u[static_cast<vcl_size_t>(i)], g_A_I_J[static_cast<vcl_size_t>(i)]);
-        composeNewVector(g_b_v_u[static_cast<vcl_size_t>(i)], 
g_b_v[static_cast<vcl_size_t>(i)]);
-        //composition of new sets: I and J
-        
g_J[static_cast<vcl_size_t>(i)].insert(g_J[static_cast<vcl_size_t>(i)].end(), 
g_J_u[static_cast<vcl_size_t>(i)].begin(), 
g_J_u[static_cast<vcl_size_t>(i)].end());
-        
g_I[static_cast<vcl_size_t>(i)].insert(g_I[static_cast<vcl_size_t>(i)].end(), 
g_I_u[static_cast<vcl_size_t>(i)].begin(), 
g_I_u[static_cast<vcl_size_t>(i)].end());
-      }
-      else
-      {
-        g_is_update[static_cast<vcl_size_t>(i)] = false;
-      }
-    }
-  }
-}
-
-
-/**************************************************** GPU SPAI Update 
****************************************************************/
-
-
-//performs Q'*A(I, \tilde J) on GPU
-/** @brief Performs multiplication Q'*A(I, \\tilde J) on GPU
- *
- * @param g_J_u          container of sets of new column indices
- * @param g_I            container of row indices
- * @param g_A_I_J_vcl    block matrix composed from previous blocks, they are 
blocks of R
- * @param g_bv_vcl       block of beta vectors
- * @param g_A_I_J_u_vcl  block of matrices A(I, \\tilde J)
- * @param g_is_update    indicators, that show if a certain block should be 
processed
- * @param ctx            Optional context in which the auxiliary data is 
created (one out of multiple OpenCL contexts, CUDA, host)
- */
-template<typename NumericT>
-void block_q_multiplication(std::vector<std::vector<unsigned int> > const & 
g_J_u,
-                            std::vector<std::vector<unsigned int> > const & 
g_I,
-                            block_matrix & g_A_I_J_vcl,
-                            block_vector & g_bv_vcl,
-                            block_matrix & g_A_I_J_u_vcl,
-                            std::vector<cl_uint> & g_is_update,
-                            viennacl::context ctx)
-{
-  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context 
&>(ctx.opencl_context());
-  unsigned int local_r_n = 0;
-  unsigned int local_c_n = 0;
-  unsigned int sz_blocks = 0;
-
-  get_max_block_size(g_I,   local_r_n);
-  get_max_block_size(g_J_u, local_c_n);
-
-  //for debug
-  std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
-  std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
-  compute_blocks_size(g_I, g_J_u, sz_blocks, blocks_ind, matrix_dims);
-  //std::vector<ScalarType> con_A_I_J(sz_blocks, static_cast<ScalarType>(0));
-
-  viennacl::ocl::handle<cl_mem> g_is_update_vcl = 
opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                                           
static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
-                                                                           
&(g_is_update[0]));
-  viennacl::linalg::opencl::kernels::spai<NumericT>::init(opencl_ctx);
-  viennacl::ocl::kernel& block_q_kernel = 
opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<NumericT>::program_name(),
 "block_q_mult");
-
-  block_q_kernel.local_work_size(0,      local_c_n);
-  block_q_kernel.global_work_size(0, 128*local_c_n);
-  viennacl::ocl::enqueue(block_q_kernel(g_A_I_J_vcl.handle(), 
g_A_I_J_vcl.handle2(), g_A_I_J_u_vcl.handle(), g_A_I_J_u_vcl.handle2(),
-                                        g_bv_vcl.handle(),
-                                        g_bv_vcl.handle1(), 
g_A_I_J_vcl.handle1(), g_A_I_J_u_vcl.handle1(), g_is_update_vcl,
-                                        
viennacl::ocl::local_mem(static_cast<unsigned 
int>(sizeof(NumericT)*(local_r_n*local_c_n))),
-                                        static_cast<cl_uint>(g_I.size())));
-}
-
-/** @brief Assembly of container of index row sets: I_q, row indices for new 
"QR block"
- *
- * @param g_I    container of row indices
- * @param g_J    container of column indices
- * @param g_I_u  container of new row indices
- * @param g_I_q  container of row indices for new QR blocks
- */
-template<typename SizeT>
-void assemble_qr_row_inds(std::vector<std::vector<SizeT> > const & g_I,
-                          std::vector<std::vector<SizeT> > const & g_J,
-                          std::vector<std::vector<SizeT> > const & g_I_u,
-                          std::vector<std::vector<SizeT> >       & g_I_q)
-{
-#ifdef VIENNACL_WITH_OPENMP
-  #pragma omp parallel for
-#endif
-  for (long i = 0; i < static_cast<long>(g_I.size()); ++i)
-  {
-    for (vcl_size_t j = g_J[static_cast<vcl_size_t>(i)].size(); j < 
g_I[static_cast<vcl_size_t>(i)].size(); ++j)
-      
g_I_q[static_cast<vcl_size_t>(i)].push_back(g_I[static_cast<vcl_size_t>(i)][j]);
-
-    for (vcl_size_t j = 0; j < g_I_u[static_cast<vcl_size_t>(i)].size(); ++j)
-      
g_I_q[static_cast<vcl_size_t>(i)].push_back(g_I_u[static_cast<vcl_size_t>(i)][j]);
-  }
-}
-
-/** @brief Performs assembly for new QR block
- *
- * @param g_J                container of column indices
- * @param g_I                container of row indices
- * @param g_J_u              container of new column indices
- * @param g_I_u              container of new row indices
- * @param g_I_q              container of row indices for new QR blocks
- * @param g_A_I_J_u_vcl      blocks of Q'*A(I, \\tilde J)
- * @param matrix_dimensions  array with matrix dimensions for all blocks
- * @param g_A_I_u_J_u_vcl    blocks A(\\tilde I, \\tilde J)
- * @param g_is_update        container with update indicators
- * @param is_empty_block     indicator if all previous blocks A(\\tilde I, 
\\tilde J) - are empty, in case if they are empty kernel with smaller number of 
arguments is used
- * @param ctx                Optional context in which the matrix is created 
(one out of multiple OpenCL contexts, CUDA, host)
-*/
-template<typename NumericT>
-void assemble_qr_block(std::vector<std::vector<unsigned int> > const & g_J,
-                       std::vector<std::vector<unsigned int> > const& g_I,
-                       std::vector<std::vector<unsigned int> > const& g_J_u,
-                       std::vector<std::vector<unsigned int> > const& g_I_u,
-                       std::vector<std::vector<unsigned int> >& g_I_q,
-                       block_matrix & g_A_I_J_u_vcl,
-                       viennacl::ocl::handle<cl_mem> & matrix_dimensions,
-                       block_matrix & g_A_I_u_J_u_vcl,
-                       std::vector<cl_uint> & g_is_update,
-                       bool is_empty_block,
-                       viennacl::context ctx)
-{
-  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context 
&>(ctx.opencl_context());
-
-  //std::vector<std::vector<unsigned int> > g_I_q(g_I.size());
-  assemble_qr_row_inds(g_I, g_J, g_I_u, g_I_q);
-  unsigned int sz_blocks;
-  std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
-  std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
-
-  compute_blocks_size(g_I_q, g_J_u, sz_blocks, blocks_ind, matrix_dims);
-
-  std::vector<NumericT> con_A_I_J_q(sz_blocks, static_cast<NumericT>(0));
-
-  block_matrix g_A_I_J_q_vcl;
-  //need to allocate memory for QR block
-  g_A_I_J_q_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                    static_cast<unsigned 
int>(sizeof(NumericT)*sz_blocks),
-                                                    &(con_A_I_J_q[0]));
-  g_A_I_J_q_vcl.handle().context(opencl_ctx);
-
-  g_A_I_J_q_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                     static_cast<unsigned 
int>(sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
-                                                     &(matrix_dims[0]));
-  g_A_I_J_q_vcl.handle1().context(opencl_ctx);
-
-  g_A_I_J_q_vcl.handle2() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                      static_cast<unsigned 
int>(sizeof(cl_uint)*static_cast<unsigned int>(g_I.size() + 1)),
-                                                      &(blocks_ind[0]));
-  g_A_I_J_q_vcl.handle2().context(opencl_ctx);
-
-  viennacl::ocl::handle<cl_mem> g_is_update_vcl = 
opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                                           
static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
-                                                                           
&(g_is_update[0]));
-
-  viennacl::linalg::opencl::kernels::spai<NumericT>::init(opencl_ctx);
-  if (!is_empty_block)
-  {
-    viennacl::ocl::kernel& qr_assembly_kernel = 
opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<NumericT>::program_name(),
 "block_qr_assembly");
-    qr_assembly_kernel.local_work_size(0, 1);
-    qr_assembly_kernel.global_work_size(0, 256);
-    viennacl::ocl::enqueue(qr_assembly_kernel(matrix_dimensions,
-                                              g_A_I_J_u_vcl.handle(),
-                                              g_A_I_J_u_vcl.handle2(),
-                                              g_A_I_J_u_vcl.handle1(),
-                                              g_A_I_u_J_u_vcl.handle(),
-                                              g_A_I_u_J_u_vcl.handle2(),
-                                              g_A_I_u_J_u_vcl.handle1(),
-                                              g_A_I_J_q_vcl.handle(),
-                                              g_A_I_J_q_vcl.handle2(),
-                                              g_A_I_J_q_vcl.handle1(),
-                                              g_is_update_vcl,
-                                              static_cast<unsigned 
int>(g_I.size())));
-  }
-  else
-  {
-    viennacl::ocl::kernel& qr_assembly_kernel = 
opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<NumericT>::program_name(),
 "block_qr_assembly_1");
-    qr_assembly_kernel.local_work_size(0, 1);
-    qr_assembly_kernel.global_work_size(0, 256);
-    viennacl::ocl::enqueue(qr_assembly_kernel(matrix_dimensions, 
g_A_I_J_u_vcl.handle(), g_A_I_J_u_vcl.handle2(),
-                                              g_A_I_J_u_vcl.handle1(),
-                                              g_A_I_J_q_vcl.handle(),
-                                              g_A_I_J_q_vcl.handle2(), 
g_A_I_J_q_vcl.handle1(),
-                                              g_is_update_vcl,
-                                              static_cast<unsigned 
int>(g_I.size())));
-  }
-  g_A_I_u_J_u_vcl.handle() = g_A_I_J_q_vcl.handle();
-  g_A_I_u_J_u_vcl.handle1() = g_A_I_J_q_vcl.handle1();
-  g_A_I_u_J_u_vcl.handle2() = g_A_I_J_q_vcl.handle2();
-}
-
-/** @brief Performs assembly for new R matrix on GPU
- *
- * @param g_I              container of row indices
- * @param g_J              container of column indices
- * @param g_A_I_J_vcl      container of block matrices from previous update
- * @param g_A_I_J_u_vcl    container of block matrices Q'*A(I, \\tilde J)
- * @param g_A_I_u_J_u_vcl  container of block matrices QR factored on current 
iteration
- * @param g_bv_vcl         block of beta vectors from previous iteration
- * @param g_bv_vcl_u       block of updated beta vectors got after recent QR 
factorization
- * @param g_is_update      container with identificators that shows which 
block should be modified
- * @param ctx              Optional context in which the auxiliary data is 
created (one out of multiple OpenCL contexts, CUDA, host)
- */
-template<typename NumericT>
-void assemble_r(std::vector<std::vector<unsigned int> > & g_I,
-                std::vector<std::vector<unsigned int> > & g_J,
-                block_matrix & g_A_I_J_vcl,
-                block_matrix & g_A_I_J_u_vcl,
-                block_matrix & g_A_I_u_J_u_vcl,
-                block_vector & g_bv_vcl,
-                block_vector & g_bv_vcl_u,
-                std::vector<cl_uint> & g_is_update,
-                viennacl::context ctx)
-{
-  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context 
&>(ctx.opencl_context());
-  std::vector<cl_uint> matrix_dims(g_I.size()*2, static_cast<cl_uint>(0));
-  std::vector<cl_uint> blocks_ind(g_I.size() + 1, static_cast<cl_uint>(0));
-  std::vector<cl_uint> start_bv_r_inds(g_I.size() + 1, 0);
-  unsigned int sz_blocks, bv_size;
-
-  compute_blocks_size(g_I, g_J, sz_blocks, blocks_ind, matrix_dims);
-  get_size(g_J, bv_size);
-  init_start_inds(g_J, start_bv_r_inds);
-
-  std::vector<NumericT> con_A_I_J_r(sz_blocks, static_cast<NumericT>(0));
-  std::vector<NumericT> b_v_r(bv_size, static_cast<NumericT>(0));
-
-  block_matrix g_A_I_J_r_vcl;
-  block_vector g_bv_r_vcl;
-  g_A_I_J_r_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                    static_cast<unsigned 
int>(sizeof(NumericT)*sz_blocks),
-                                                    &(con_A_I_J_r[0]));
-  g_A_I_J_r_vcl.handle().context(opencl_ctx);
-
-  g_A_I_J_r_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                     static_cast<unsigned 
int>(sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
-                                                     &(matrix_dims[0]));
-  g_A_I_J_r_vcl.handle1().context(opencl_ctx);
-
-  g_A_I_J_r_vcl.handle2() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                     static_cast<unsigned 
int>(sizeof(cl_uint)*static_cast<unsigned int>(g_I.size() + 1)),
-                                                     &(blocks_ind[0]));
-  g_A_I_J_r_vcl.handle2().context(opencl_ctx);
-
-  g_bv_r_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                 static_cast<unsigned 
int>(sizeof(NumericT)*bv_size),
-                                                 &(b_v_r[0]));
-  g_bv_r_vcl.handle().context(opencl_ctx);
-
-  g_bv_r_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                  static_cast<unsigned 
int>(sizeof(cl_uint)*(g_I.size() + 1)),
-                                                  &(start_bv_r_inds[0]));
-  g_bv_r_vcl.handle().context(opencl_ctx);
-
-  viennacl::ocl::handle<cl_mem> g_is_update_vcl = 
opencl_ctx.create_memory(CL_MEM_READ_WRITE,
-                                                                           
static_cast<unsigned int>(sizeof(cl_uint)*(g_is_update.size())),
-                                                                           
&(g_is_update[0]));
-  viennacl::linalg::opencl::kernels::spai<NumericT>::init(opencl_ctx);
-  viennacl::ocl::kernel& r_assembly_kernel = 
opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<NumericT>::program_name(),
 "block_r_assembly");
-  r_assembly_kernel.local_work_size(0, 1);
-  r_assembly_kernel.global_work_size(0, 256);
-
-  viennacl::ocl::enqueue(r_assembly_kernel(g_A_I_J_vcl.handle(), 
g_A_I_J_vcl.handle2(), g_A_I_J_vcl.handle1(),
-                                          g_A_I_J_u_vcl.handle(), 
g_A_I_J_u_vcl.handle2(), g_A_I_J_u_vcl.handle1(),
-                                          g_A_I_u_J_u_vcl.handle(), 
g_A_I_u_J_u_vcl.handle2(), g_A_I_u_J_u_vcl.handle1(),
-                                          g_A_I_J_r_vcl.handle(), 
g_A_I_J_r_vcl.handle2(), g_A_I_J_r_vcl.handle1(),
-                                          g_is_update_vcl, 
static_cast<cl_uint>(g_I.size())));
-
-  viennacl::ocl::kernel & bv_assembly_kernel = 
opencl_ctx.get_kernel(viennacl::linalg::opencl::kernels::spai<NumericT>::program_name(),
 "block_bv_assembly");
-  bv_assembly_kernel.local_work_size(0, 1);
-  bv_assembly_kernel.global_work_size(0, 256);
-  viennacl::ocl::enqueue(bv_assembly_kernel(g_bv_vcl.handle(), 
g_bv_vcl.handle1(), g_A_I_J_vcl.handle1(), g_bv_vcl_u.handle(),
-                                            g_bv_vcl_u.handle1(), 
g_A_I_J_u_vcl.handle1(),
-                                            g_bv_r_vcl.handle(), 
g_bv_r_vcl.handle1(), g_A_I_J_r_vcl.handle1(), g_is_update_vcl,
-                                            static_cast<cl_uint>(g_I.size())));
-  g_bv_vcl.handle() = g_bv_r_vcl.handle();
-  g_bv_vcl.handle1() = g_bv_r_vcl.handle1();
-
-  g_A_I_J_vcl.handle() = g_A_I_J_r_vcl.handle();
-  g_A_I_J_vcl.handle2() = g_A_I_J_r_vcl.handle2();
-  g_A_I_J_vcl.handle1() = g_A_I_J_r_vcl.handle1();
-}
-
-/** @brief GPU-based block update
- *
- * @param A            sparse matrix
- * @param A_v_c        vectorized column-wise initial matrix
- * @param g_is_update  container with identificators that shows which block 
should be modified
- * @param g_res        container of residuals for all columns
- * @param g_J          container of column index sets for all columns
- * @param g_I          container of row index sets for all columns
- * @param g_A_I_J_vcl  container of block matrices from previous update
- * @param g_bv_vcl     block of beta vectors from previous iteration
- * @param tag          SPAI configuration tag
- */
-template<typename NumericT, unsigned int AlignmentV, typename SparseVectorT>
-void block_update(viennacl::compressed_matrix<NumericT, AlignmentV> const & A,
-                  std::vector<SparseVectorT> const & A_v_c,
-                  std::vector<cl_uint> & g_is_update,
-                  std::vector<SparseVectorT> & g_res,
-                  std::vector<std::vector<unsigned int> > & g_J,
-                  std::vector<std::vector<unsigned int> > & g_I,
-                  block_matrix & g_A_I_J_vcl,
-                  block_vector & g_bv_vcl,
-                  spai_tag const & tag)
-{
-  viennacl::context ctx = viennacl::traits::context(A);
-  //updated index set for columns
-  std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
-  //updated index set for rows
-  std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
-  //mixed index set of old and updated indices for rows
-  std::vector<std::vector<unsigned int> > g_I_q(g_J.size());
-  //GPU memory for A_I_\hatJ
-  block_matrix g_A_I_J_u_vcl;
-  //GPU memory for A_\hatI_\hatJ
-  block_matrix g_A_I_u_J_u_vcl;
-  bool is_empty_block;
-  //GPU memory for new b_v
-  block_vector g_bv_u_vcl;
-
-#ifdef VIENNACL_WITH_OPENMP
-  #pragma omp parallel for
-#endif
-  for (long i = 0; i < static_cast<long>(g_J.size()); ++i)
-  {
-    if (g_is_update[static_cast<vcl_size_t>(i)])
-    {
-      if (buildAugmentedIndexSet<SparseVectorT, NumericT>(A_v_c, 
g_res[static_cast<vcl_size_t>(i)], g_J[static_cast<vcl_size_t>(i)], 
g_J_u[static_cast<vcl_size_t>(i)], tag))
-          buildNewRowSet(A_v_c, g_I[static_cast<vcl_size_t>(i)], 
g_J_u[static_cast<vcl_size_t>(i)], g_I_u[static_cast<vcl_size_t>(i)]);
-    }
-  }
-  //assemble new A_I_J_u blocks on GPU and multiply them with Q'
-  block_assembly(A, g_J_u, g_I, g_A_I_J_u_vcl, g_is_update, is_empty_block);
-  //I have matrix A_I_J_u ready..
-  block_q_multiplication<NumericT>(g_J_u, g_I, g_A_I_J_vcl, g_bv_vcl, 
g_A_I_J_u_vcl, g_is_update, ctx);
-  //assemble A_\hatI_\hatJ
-  block_assembly(A, g_J_u, g_I_u, g_A_I_u_J_u_vcl, g_is_update, 
is_empty_block);
-  assemble_qr_block<NumericT>(g_J, g_I, g_J_u, g_I_u, g_I_q, g_A_I_J_u_vcl, 
g_A_I_J_vcl.handle1(),
-                              g_A_I_u_J_u_vcl, g_is_update, is_empty_block, 
ctx);
-
-  block_qr<NumericT>(g_I_q, g_J_u, g_A_I_u_J_u_vcl, g_bv_u_vcl, g_is_update, 
ctx);
-  //concatanation of new and old indices
-#ifdef VIENNACL_WITH_OPENMP
-  #pragma omp parallel for
-#endif
-  for (long i = 0; i < static_cast<long>(g_J.size()); ++i)
-  {
-    
g_J[static_cast<vcl_size_t>(i)].insert(g_J[static_cast<vcl_size_t>(i)].end(), 
g_J_u[static_cast<vcl_size_t>(i)].begin(), 
g_J_u[static_cast<vcl_size_t>(i)].end());
-    
g_I[static_cast<vcl_size_t>(i)].insert(g_I[static_cast<vcl_size_t>(i)].end(), 
g_I_u[static_cast<vcl_size_t>(i)].begin(), 
g_I_u[static_cast<vcl_size_t>(i)].end());
-  }
-  assemble_r<NumericT>(g_I, g_J, g_A_I_J_vcl, g_A_I_J_u_vcl, g_A_I_u_J_u_vcl,  
g_bv_vcl,  g_bv_u_vcl, g_is_update, ctx);
-}
-
-}
-}
-}
-}
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/spai-static.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/spai-static.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/spai-static.hpp
deleted file mode 100644
index 0fd11146..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/spai-static.hpp
+++ /dev/null
@@ -1,192 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_STATIC_HPP
-#define VIENNACL_LINALG_DETAIL_SPAI_SPAI_STATIC_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/spai/spai-static.hpp
-    @brief Implementation of a static SPAI. Experimental.
-
-    SPAI code contributed by Nikolay Lukash
-*/
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <algorithm>
-#include <vector>
-#include <math.h>
-#include <map>
-//#include "spai-dynamic.hpp"
-#include "boost/numeric/ublas/vector.hpp"
-#include "boost/numeric/ublas/matrix.hpp"
-#include "boost/numeric/ublas/matrix_proxy.hpp"
-#include "boost/numeric/ublas/vector_proxy.hpp"
-#include "boost/numeric/ublas/storage.hpp"
-#include "boost/numeric/ublas/io.hpp"
-#include "boost/numeric/ublas/lu.hpp"
-#include "boost/numeric/ublas/triangular.hpp"
-#include "boost/numeric/ublas/matrix_expression.hpp"
-// ViennaCL includes
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/compressed_matrix.hpp"
-#include "viennacl/linalg/sparse_matrix_operations.hpp"
-#include "viennacl/linalg/matrix_operations.hpp"
-#include "viennacl/scalar.hpp"
-#include "viennacl/linalg/cg.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-
-//#include "boost/numeric/ublas/detail/matrix_assign.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-namespace spai
-{
-
-/** @brief Determines if element ind is in set {J}
- *
- * @param J     current set
- * @param ind   current element
- */
-template<typename SizeT>
-bool isInIndexSet(std::vector<SizeT> const & J, SizeT ind)
-{
-  return (std::find(J.begin(), J.end(), ind) != J.end());
-}
-
-
-
-/********************************* STATIC SPAI 
FUNCTIONS******************************************/
-
-/** @brief Projects solution of LS problem onto original column m
- *
- * @param m_in   solution of LS
- * @param J      set of non-zero columns
- * @param m      original column of M
- */
-template<typename VectorT, typename SparseVectorT>
-void fanOutVector(VectorT const & m_in, std::vector<unsigned int> const & J, 
SparseVectorT & m)
-{
-  unsigned int  cnt = 0;
-  for (vcl_size_t i = 0; i < J.size(); ++i)
-    m[J[i]] = m_in(cnt++);
-}
-
-/** @brief Solution of linear:R*x=y system by backward substitution
- *
- * @param R   uppertriangular matrix
- * @param y   right handside vector
- * @param x   solution vector
- */
-template<typename MatrixT, typename VectorT>
-void backwardSolve(MatrixT const & R, VectorT const & y, VectorT & x)
-{
-  for (long i2 = static_cast<long>(R.size2())-1; i2 >= 0; i2--)
-  {
-    vcl_size_t i = static_cast<vcl_size_t>(i2);
-    x(i) = y(i);
-    for (vcl_size_t j = static_cast<vcl_size_t>(i)+1; j < R.size2(); ++j)
-      x(i) -= R(i,j)*x(j);
-
-    x(i) /= R(i,i);
-  }
-}
-
-/** @brief Perform projection of set I on the unit-vector
- *
- * @param I     set of non-zero rows
- * @param y     result vector
- * @param ind   index of unit vector
- */
-template<typename VectorT, typename NumericT>
-void projectI(std::vector<unsigned int> const & I, VectorT & y, unsigned int 
ind)
-{
-  for (vcl_size_t i = 0; i < I.size(); ++i)
-  {
-    //y.resize(y.size()+1);
-    if (I[i] == ind)
-      y(i) = NumericT(1.0);
-    else
-      y(i) = NumericT(0.0);
-  }
-}
-
-/** @brief Builds index set of projected columns for current column of 
preconditioner
- *
- * @param v    current column of preconditioner
- * @param J    output - index set of non-zero columns
- */
-template<typename SparseVectorT>
-void buildColumnIndexSet(SparseVectorT const & v, std::vector<unsigned int> & 
J)
-{
-  for (typename SparseVectorT::const_iterator vec_it = v.begin(); vec_it != 
v.end(); ++vec_it)
-    J.push_back(vec_it->first);
-
-  std::sort(J.begin(), J.end());
-}
-
-/** @brief Initialize preconditioner with sparcity pattern = p(A)
- *
- * @param A   input matrix
- * @param M   output matrix - initialized preconditioner
- */
-template<typename SparseMatrixT>
-void initPreconditioner(SparseMatrixT const & A, SparseMatrixT & M)
-{
-  typedef typename SparseMatrixT::value_type      NumericType;
-
-  M.resize(A.size1(), A.size2(), false);
-  for (typename SparseMatrixT::const_iterator1 row_it = A.begin1(); row_it!= 
A.end1(); ++row_it)
-    for (typename SparseMatrixT::const_iterator2 col_it = row_it.begin(); 
col_it != row_it.end(); ++col_it)
-      M(col_it.index1(),col_it.index2()) = NumericType(1);
-}
-
-/** @brief Row projection for matrix A(:,J) -> A(I,J), building index set of 
non-zero rows
- *
- * @param A_v_c   input matrix
- * @param J       set of non-zero rows
- * @param I       output matrix
- */
-template<typename SparseVectorT>
-void projectRows(std::vector<SparseVectorT> const & A_v_c,
-                 std::vector<unsigned int> const & J,
-                 std::vector<unsigned int>       & I)
-{
-  for (vcl_size_t i = 0; i < J.size(); ++i)
-  {
-    for (typename SparseVectorT::const_iterator col_it = A_v_c[J[i]].begin(); 
col_it!=A_v_c[J[i]].end(); ++col_it)
-    {
-      if (!isInIndexSet(I, col_it->first))
-        I.push_back(col_it->first);
-    }
-  }
-  std::sort(I.begin(), I.end());
-}
-
-
-} //namespace spai
-} //namespace detail
-} //namespace linalg
-} //namespace viennacl
-
-#endif

Reply via email to