http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp
deleted file mode 100644
index 93b0cba..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/common.hpp
+++ /dev/null
@@ -1,263 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_ILU_COMMON_HPP_
-#define VIENNACL_LINALG_DETAIL_ILU_COMMON_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/ilu/common.hpp
-    @brief Common routines used within ILU-type preconditioners
-*/
-
-#include <vector>
-#include <cmath>
-#include <iostream>
-#include <map>
-#include <list>
-
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/backend/memory.hpp"
-
-#include "viennacl/linalg/host_based/common.hpp"
-#include "viennacl/linalg/misc_operations.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-
-
-//
-// Level Scheduling Setup for ILU:
-//
-
-template<typename NumericT, unsigned int AlignmentV>
-void level_scheduling_setup_impl(viennacl::compressed_matrix<NumericT, 
AlignmentV> const & LU,
-                                 viennacl::vector<NumericT> const & 
diagonal_LU,
-                                 std::list<viennacl::backend::mem_handle> & 
row_index_arrays,
-                                 std::list<viennacl::backend::mem_handle> & 
row_buffers,
-                                 std::list<viennacl::backend::mem_handle> & 
col_buffers,
-                                 std::list<viennacl::backend::mem_handle> & 
element_buffers,
-                                 std::list<vcl_size_t> & 
row_elimination_num_list,
-                                 bool setup_U)
-{
-  NumericT     const * diagonal_buf = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_LU.handle());
-  NumericT     const * elements     = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(LU.handle());
-  unsigned int const * row_buffer   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(LU.handle1());
-  unsigned int const * col_buffer   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(LU.handle2());
-
-  //
-  // Step 1: Determine row elimination order for each row and build up meta 
information about the number of entries taking part in each elimination step:
-  //
-  std::vector<vcl_size_t> row_elimination(LU.size1());
-  std::map<vcl_size_t, std::map<vcl_size_t, vcl_size_t> > 
row_entries_per_elimination_step;
-
-  vcl_size_t max_elimination_runs = 0;
-  for (vcl_size_t row2 = 0; row2 < LU.size1(); ++row2)
-  {
-    vcl_size_t row = setup_U ? (LU.size1() - row2) - 1 : row2;
-
-    vcl_size_t row_begin = row_buffer[row];
-    vcl_size_t row_end   = row_buffer[row+1];
-    vcl_size_t elimination_index = 0;  //Note: first run corresponds to 
elimination_index = 1 (otherwise, type issues with int <-> unsigned int would 
arise
-    for (vcl_size_t i = row_begin; i < row_end; ++i)
-    {
-      unsigned int col = col_buffer[i];
-      if ( (!setup_U && col < row) || (setup_U && col > row) )
-      {
-        elimination_index = std::max<vcl_size_t>(elimination_index, 
row_elimination[col]);
-        row_entries_per_elimination_step[row_elimination[col]][row] += 1;
-      }
-    }
-    row_elimination[row] = elimination_index + 1;
-    max_elimination_runs = std::max<vcl_size_t>(max_elimination_runs, 
elimination_index + 1);
-  }
-
-  //std::cout << "Number of elimination runs: " << max_elimination_runs << 
std::endl;
-
-  //
-  // Step 2: Build row-major elimination matrix for each elimination step
-  //
-
-  //std::cout << "Elimination order: " << std::endl;
-  //for (vcl_size_t i=0; i<row_elimination.size(); ++i)
-  //  std::cout << row_elimination[i] << ", ";
-  //std::cout << std::endl;
-
-  //vcl_size_t summed_rows = 0;
-  for (vcl_size_t elimination_run = 1; elimination_run <= 
max_elimination_runs; ++elimination_run)
-  {
-    std::map<vcl_size_t, vcl_size_t> const & current_elimination_info = 
row_entries_per_elimination_step[elimination_run];
-
-    // count cols and entries handled in this elimination step
-    vcl_size_t num_tainted_cols = current_elimination_info.size();
-    vcl_size_t num_entries = 0;
-
-    for (std::map<vcl_size_t, vcl_size_t>::const_iterator it  = 
current_elimination_info.begin();
-                                                          it != 
current_elimination_info.end();
-                                                        ++it)
-      num_entries += it->second;
-
-    //std::cout << "num_entries: " << num_entries << std::endl;
-    //std::cout << "num_tainted_cols: " << num_tainted_cols << std::endl;
-
-    if (num_tainted_cols > 0)
-    {
-      row_index_arrays.push_back(viennacl::backend::mem_handle());
-      viennacl::backend::switch_memory_context<unsigned 
int>(row_index_arrays.back(), viennacl::traits::context(LU));
-      viennacl::backend::typesafe_host_array<unsigned int> 
elim_row_index_array(row_index_arrays.back(), num_tainted_cols);
-
-      row_buffers.push_back(viennacl::backend::mem_handle());
-      viennacl::backend::switch_memory_context<unsigned 
int>(row_buffers.back(), viennacl::traits::context(LU));
-      viennacl::backend::typesafe_host_array<unsigned int> 
elim_row_buffer(row_buffers.back(), num_tainted_cols + 1);
-
-      col_buffers.push_back(viennacl::backend::mem_handle());
-      viennacl::backend::switch_memory_context<unsigned 
int>(col_buffers.back(), viennacl::traits::context(LU));
-      viennacl::backend::typesafe_host_array<unsigned int> 
elim_col_buffer(col_buffers.back(), num_entries);
-
-      element_buffers.push_back(viennacl::backend::mem_handle());
-      
viennacl::backend::switch_memory_context<NumericT>(element_buffers.back(), 
viennacl::traits::context(LU));
-      std::vector<NumericT> elim_elements_buffer(num_entries);
-
-      row_elimination_num_list.push_back(num_tainted_cols);
-
-      vcl_size_t k=0;
-      vcl_size_t nnz_index = 0;
-      elim_row_buffer.set(0, 0);
-
-      for (std::map<vcl_size_t, vcl_size_t>::const_iterator it  = 
current_elimination_info.begin();
-                                                              it != 
current_elimination_info.end();
-                                                            ++it)
-      {
-        //vcl_size_t col = setup_U ? (elimination_matrix.size() - it->first) - 
1 : col2;
-        vcl_size_t row = it->first;
-        elim_row_index_array.set(k, row);
-
-        vcl_size_t row_begin = row_buffer[row];
-        vcl_size_t row_end   = row_buffer[row+1];
-        for (vcl_size_t i = row_begin; i < row_end; ++i)
-        {
-          unsigned int col = col_buffer[i];
-          if ( (!setup_U && col < row) || (setup_U && col > row) ) //entry of 
L/U
-          {
-            if (row_elimination[col] == elimination_run) // this entry is 
substituted in this run
-            {
-              elim_col_buffer.set(nnz_index, col);
-              elim_elements_buffer[nnz_index] = setup_U ? elements[i] / 
diagonal_buf[it->first] : elements[i];
-              ++nnz_index;
-            }
-          }
-        }
-
-        elim_row_buffer.set(++k, nnz_index);
-      }
-
-      //
-      // Wrap in memory_handles:
-      //
-      viennacl::backend::memory_create(row_index_arrays.back(), 
elim_row_index_array.raw_size(),                
viennacl::traits::context(row_index_arrays.back()), elim_row_index_array.get());
-      viennacl::backend::memory_create(row_buffers.back(),      
elim_row_buffer.raw_size(),                     
viennacl::traits::context(row_buffers.back()),      elim_row_buffer.get());
-      viennacl::backend::memory_create(col_buffers.back(),      
elim_col_buffer.raw_size(),                     
viennacl::traits::context(col_buffers.back()),      elim_col_buffer.get());
-      viennacl::backend::memory_create(element_buffers.back(),  
sizeof(NumericT) * elim_elements_buffer.size(), 
viennacl::traits::context(element_buffers.back()),  &(elim_elements_buffer[0]));
-    }
-
-    // Print some info:
-    //std::cout << "Eliminated columns in run " << elimination_run << ": " << 
num_tainted_cols << " (tainted columns: " << num_tainted_cols << ")" << 
std::endl;
-    //summed_rows += eliminated_rows_in_run;
-    //if (eliminated_rows_in_run == 0)
-    //  break;
-  }
-  //std::cout << "Eliminated rows: " << summed_rows << " out of " << 
row_elimination.size() << std::endl;
-}
-
-
-template<typename NumericT, unsigned int AlignmentV>
-void level_scheduling_setup_L(viennacl::compressed_matrix<NumericT, 
AlignmentV> const & LU,
-                              viennacl::vector<NumericT> const & diagonal_LU,
-                              std::list<viennacl::backend::mem_handle> & 
row_index_arrays,
-                              std::list<viennacl::backend::mem_handle> & 
row_buffers,
-                              std::list<viennacl::backend::mem_handle> & 
col_buffers,
-                              std::list<viennacl::backend::mem_handle> & 
element_buffers,
-                              std::list<vcl_size_t> & row_elimination_num_list)
-{
-  level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, 
col_buffers, element_buffers, row_elimination_num_list, false);
-}
-
-
-//
-// Multifrontal setup of U:
-//
-
-template<typename NumericT, unsigned int AlignmentV>
-void level_scheduling_setup_U(viennacl::compressed_matrix<NumericT, 
AlignmentV> const & LU,
-                              viennacl::vector<NumericT> const & diagonal_LU,
-                              std::list<viennacl::backend::mem_handle> & 
row_index_arrays,
-                              std::list<viennacl::backend::mem_handle> & 
row_buffers,
-                              std::list<viennacl::backend::mem_handle> & 
col_buffers,
-                              std::list<viennacl::backend::mem_handle> & 
element_buffers,
-                              std::list<vcl_size_t> & row_elimination_num_list)
-{
-  level_scheduling_setup_impl(LU, diagonal_LU, row_index_arrays, row_buffers, 
col_buffers, element_buffers, row_elimination_num_list, true);
-}
-
-
-//
-// Multifrontal substitution (both L and U). Will partly be moved to 
single_threaded/opencl/cuda implementations
-//
-template<typename NumericT>
-void level_scheduling_substitute(viennacl::vector<NumericT> & vec,
-                                 std::list<viennacl::backend::mem_handle> 
const & row_index_arrays,
-                                 std::list<viennacl::backend::mem_handle> 
const & row_buffers,
-                                 std::list<viennacl::backend::mem_handle> 
const & col_buffers,
-                                 std::list<viennacl::backend::mem_handle> 
const & element_buffers,
-                                 std::list<vcl_size_t> const & 
row_elimination_num_list)
-{
-  typedef typename std::list< viennacl::backend::mem_handle >::const_iterator  
ListIterator;
-  ListIterator row_index_array_it = row_index_arrays.begin();
-  ListIterator row_buffers_it = row_buffers.begin();
-  ListIterator col_buffers_it = col_buffers.begin();
-  ListIterator element_buffers_it = element_buffers.begin();
-  typename std::list< vcl_size_t>::const_iterator row_elimination_num_it = 
row_elimination_num_list.begin();
-  for (vcl_size_t i=0; i<row_index_arrays.size(); ++i)
-  {
-    viennacl::linalg::detail::level_scheduling_substitute(vec, 
*row_index_array_it, *row_buffers_it, *col_buffers_it, *element_buffers_it, 
*row_elimination_num_it);
-
-    ++row_index_array_it;
-    ++row_buffers_it;
-    ++col_buffers_it;
-    ++element_buffers_it;
-    ++row_elimination_num_it;
-  }
-}
-
-
-
-
-
-} // namespace detail
-} // namespace linalg
-} // namespace viennacl
-
-
-
-
-#endif
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp
deleted file mode 100644
index 1c3191a..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilu0.hpp
+++ /dev/null
@@ -1,379 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_ILU0_HPP_
-#define VIENNACL_LINALG_DETAIL_ILU0_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/ilu/ilu0.hpp
-  @brief Implementations of incomplete factorization preconditioners with 
static nonzero pattern.
-
-  Contributed by Evan Bollig.
-
-  ILU0 (Incomplete LU with zero fill-in)
-  - All preconditioner nonzeros exist at locations that were nonzero in the 
input matrix.
-  - The number of nonzeros in the output preconditioner are exactly the same 
number as the input matrix
-
- Evan Bollig 3/30/12
-
- Adapted from viennacl/linalg/detail/ilut.hpp
-
- Low-level reimplementation by Karl Rupp in Nov 2012, increasing performance 
substantially. Also added level-scheduling.
-
-*/
-
-#include <vector>
-#include <cmath>
-#include <iostream>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/detail/ilu/common.hpp"
-#include "viennacl/compressed_matrix.hpp"
-#include "viennacl/backend/memory.hpp"
-
-#include "viennacl/linalg/host_based/common.hpp"
-
-#include <map>
-
-namespace viennacl
-{
-namespace linalg
-{
-
-/** @brief A tag for incomplete LU factorization with static pattern (ILU0)
-*/
-class ilu0_tag
-{
-public:
-  ilu0_tag(bool with_level_scheduling = false) : 
use_level_scheduling_(with_level_scheduling) {}
-
-  bool use_level_scheduling() const { return use_level_scheduling_; }
-  void use_level_scheduling(bool b) { use_level_scheduling_ = b; }
-
-private:
-  bool use_level_scheduling_;
-};
-
-
-/** @brief Implementation of a ILU-preconditioner with static pattern. 
Optimized version for CSR matrices.
-  *
-  * refer to the Algorithm in Saad's book (1996 edition)
-  *
-  *  @param A       The sparse matrix matrix. The result is directly written 
to A.
-  */
-template<typename NumericT>
-void precondition(viennacl::compressed_matrix<NumericT> & A, ilu0_tag const & 
/* tag */)
-{
-  assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && 
bool("System matrix must reside in main memory for ILU0") );
-  assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && 
bool("System matrix must reside in main memory for ILU0") );
-  assert( (A.handle().get_active_handle_id()  == viennacl::MAIN_MEMORY) && 
bool("System matrix must reside in main memory for ILU0") );
-
-  NumericT           * elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
-  unsigned int const * row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(A.handle1());
-  unsigned int const * col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(A.handle2());
-
-  // Note: Line numbers in the following refer to the algorithm in Saad's book
-
-  for (vcl_size_t i=1; i<A.size1(); ++i)  // Line 1
-  {
-    unsigned int row_i_begin = row_buffer[i];
-    unsigned int row_i_end   = row_buffer[i+1];
-    for (unsigned int buf_index_k = row_i_begin; buf_index_k < row_i_end; 
++buf_index_k) //Note: We do not assume that the column indices within a row 
are sorted
-    {
-      unsigned int k = col_buffer[buf_index_k];
-      if (k >= i)
-        continue; //Note: We do not assume that the column indices within a 
row are sorted
-
-      unsigned int row_k_begin = row_buffer[k];
-      unsigned int row_k_end   = row_buffer[k+1];
-
-      // get a_kk:
-      NumericT a_kk = 0;
-      for (unsigned int buf_index_akk = row_k_begin; buf_index_akk < 
row_k_end; ++buf_index_akk)
-      {
-        if (col_buffer[buf_index_akk] == k)
-        {
-          a_kk = elements[buf_index_akk];
-          break;
-        }
-      }
-
-      NumericT & a_ik = elements[buf_index_k];
-      a_ik /= a_kk;                                 //Line 3
-
-      for (unsigned int buf_index_j = row_i_begin; buf_index_j < row_i_end; 
++buf_index_j) //Note: We do not assume that the column indices within a row 
are sorted
-      {
-        unsigned int j = col_buffer[buf_index_j];
-        if (j <= k)
-          continue;
-
-        // determine a_kj:
-        NumericT a_kj = 0;
-        for (unsigned int buf_index_akj = row_k_begin; buf_index_akj < 
row_k_end; ++buf_index_akj)
-        {
-          if (col_buffer[buf_index_akj] == j)
-          {
-            a_kj = elements[buf_index_akj];
-            break;
-          }
-        }
-
-        //a_ij -= a_ik * a_kj
-        elements[buf_index_j] -= a_ik * a_kj;  //Line 5
-      }
-    }
-  }
-
-}
-
-
-/** @brief ILU0 preconditioner class, can be supplied to solve()-routines
-*/
-template<typename MatrixT>
-class ilu0_precond
-{
-  typedef typename MatrixT::value_type      NumericType;
-
-public:
-  ilu0_precond(MatrixT const & mat, ilu0_tag const & tag) : tag_(tag), LU_()
-  {
-    //initialize preconditioner:
-    //std::cout << "Start CPU precond" << std::endl;
-    init(mat);
-    //std::cout << "End CPU precond" << std::endl;
-  }
-
-  template<typename VectorT>
-  void apply(VectorT & vec) const
-  {
-    unsigned int const * row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(LU_.handle1());
-    unsigned int const * col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(LU_.handle2());
-    NumericType  const * elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(LU_.handle());
-
-    
viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer,
 col_buffer, elements, vec, LU_.size2(), unit_lower_tag());
-    
viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer,
 col_buffer, elements, vec, LU_.size2(), upper_tag());
-  }
-
-private:
-  void init(MatrixT const & mat)
-  {
-    viennacl::context host_context(viennacl::MAIN_MEMORY);
-    viennacl::switch_memory_context(LU_, host_context);
-
-    viennacl::copy(mat, LU_);
-    viennacl::linalg::precondition(LU_, tag_);
-  }
-
-  ilu0_tag                                   tag_;
-  viennacl::compressed_matrix<NumericType>   LU_;
-};
-
-
-/** @brief ILU0 preconditioner class, can be supplied to solve()-routines.
-*
-*  Specialization for compressed_matrix
-*/
-template<typename NumericT, unsigned int AlignmentV>
-class ilu0_precond< viennacl::compressed_matrix<NumericT, AlignmentV> >
-{
-  typedef viennacl::compressed_matrix<NumericT, AlignmentV>   MatrixType;
-
-public:
-  ilu0_precond(MatrixType const & mat, ilu0_tag const & tag)
-    : tag_(tag),
-      LU_(mat.size1(), mat.size2(), viennacl::traits::context(mat))
-  {
-    //initialize preconditioner:
-    //std::cout << "Start GPU precond" << std::endl;
-    init(mat);
-    //std::cout << "End GPU precond" << std::endl;
-  }
-
-  void apply(viennacl::vector<NumericT> & vec) const
-  {
-    viennacl::context host_context(viennacl::MAIN_MEMORY);
-    if (vec.handle().get_active_handle_id() != viennacl::MAIN_MEMORY)
-    {
-      if (tag_.use_level_scheduling())
-      {
-        //std::cout << "Using multifrontal on GPU..." << std::endl;
-        detail::level_scheduling_substitute(vec,
-                                            multifrontal_L_row_index_arrays_,
-                                            multifrontal_L_row_buffers_,
-                                            multifrontal_L_col_buffers_,
-                                            multifrontal_L_element_buffers_,
-                                            
multifrontal_L_row_elimination_num_list_);
-
-        vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_);
-
-        detail::level_scheduling_substitute(vec,
-                                            multifrontal_U_row_index_arrays_,
-                                            multifrontal_U_row_buffers_,
-                                            multifrontal_U_col_buffers_,
-                                            multifrontal_U_element_buffers_,
-                                            
multifrontal_U_row_elimination_num_list_);
-      }
-      else
-      {
-        viennacl::context old_context = viennacl::traits::context(vec);
-        viennacl::switch_memory_context(vec, host_context);
-        viennacl::linalg::inplace_solve(LU_, vec, unit_lower_tag());
-        viennacl::linalg::inplace_solve(LU_, vec, upper_tag());
-        viennacl::switch_memory_context(vec, old_context);
-      }
-    }
-    else //apply ILU0 directly on CPU
-    {
-      if (tag_.use_level_scheduling())
-      {
-        //std::cout << "Using multifrontal..." << std::endl;
-        detail::level_scheduling_substitute(vec,
-                                            multifrontal_L_row_index_arrays_,
-                                            multifrontal_L_row_buffers_,
-                                            multifrontal_L_col_buffers_,
-                                            multifrontal_L_element_buffers_,
-                                            
multifrontal_L_row_elimination_num_list_);
-
-        vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_);
-
-        detail::level_scheduling_substitute(vec,
-                                            multifrontal_U_row_index_arrays_,
-                                            multifrontal_U_row_buffers_,
-                                            multifrontal_U_col_buffers_,
-                                            multifrontal_U_element_buffers_,
-                                            
multifrontal_U_row_elimination_num_list_);
-      }
-      else
-      {
-        viennacl::linalg::inplace_solve(LU_, vec, unit_lower_tag());
-        viennacl::linalg::inplace_solve(LU_, vec, upper_tag());
-      }
-    }
-  }
-
-  vcl_size_t levels() const { return multifrontal_L_row_index_arrays_.size(); }
-
-private:
-  void init(MatrixType const & mat)
-  {
-    viennacl::context host_context(viennacl::MAIN_MEMORY);
-    viennacl::switch_memory_context(LU_, host_context);
-    LU_ = mat;
-    viennacl::linalg::precondition(LU_, tag_);
-
-    if (!tag_.use_level_scheduling())
-      return;
-
-    // multifrontal part:
-    viennacl::switch_memory_context(multifrontal_U_diagonal_, host_context);
-    multifrontal_U_diagonal_.resize(LU_.size1(), false);
-    host_based::detail::row_info(LU_, multifrontal_U_diagonal_, 
viennacl::linalg::detail::SPARSE_ROW_DIAGONAL);
-
-    detail::level_scheduling_setup_L(LU_,
-                                     multifrontal_U_diagonal_, //dummy
-                                     multifrontal_L_row_index_arrays_,
-                                     multifrontal_L_row_buffers_,
-                                     multifrontal_L_col_buffers_,
-                                     multifrontal_L_element_buffers_,
-                                     multifrontal_L_row_elimination_num_list_);
-
-
-    detail::level_scheduling_setup_U(LU_,
-                                     multifrontal_U_diagonal_,
-                                     multifrontal_U_row_index_arrays_,
-                                     multifrontal_U_row_buffers_,
-                                     multifrontal_U_col_buffers_,
-                                     multifrontal_U_element_buffers_,
-                                     multifrontal_U_row_elimination_num_list_);
-
-    //
-    // Bring to device if necessary:
-    //
-
-    // L:
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_L_row_index_arrays_.begin();
-                                                                       it != 
multifrontal_L_row_index_arrays_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_L_row_buffers_.begin();
-                                                                       it != 
multifrontal_L_row_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_L_col_buffers_.begin();
-                                                                       it != 
multifrontal_L_col_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_L_element_buffers_.begin();
-                                                                       it != 
multifrontal_L_element_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<NumericT>(*it, 
viennacl::traits::context(mat));
-
-
-    // U:
-
-    viennacl::switch_memory_context(multifrontal_U_diagonal_, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_U_row_index_arrays_.begin();
-                                                                       it != 
multifrontal_U_row_index_arrays_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_U_row_buffers_.begin();
-                                                                       it != 
multifrontal_U_row_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_U_col_buffers_.begin();
-                                                                       it != 
multifrontal_U_col_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_U_element_buffers_.begin();
-                                                                       it != 
multifrontal_U_element_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<NumericT>(*it, 
viennacl::traits::context(mat));
-
-  }
-
-  ilu0_tag tag_;
-  viennacl::compressed_matrix<NumericT> LU_;
-
-  std::list<viennacl::backend::mem_handle> multifrontal_L_row_index_arrays_;
-  std::list<viennacl::backend::mem_handle> multifrontal_L_row_buffers_;
-  std::list<viennacl::backend::mem_handle> multifrontal_L_col_buffers_;
-  std::list<viennacl::backend::mem_handle> multifrontal_L_element_buffers_;
-  std::list<vcl_size_t>                    
multifrontal_L_row_elimination_num_list_;
-
-  viennacl::vector<NumericT> multifrontal_U_diagonal_;
-  std::list<viennacl::backend::mem_handle> multifrontal_U_row_index_arrays_;
-  std::list<viennacl::backend::mem_handle> multifrontal_U_row_buffers_;
-  std::list<viennacl::backend::mem_handle> multifrontal_U_col_buffers_;
-  std::list<viennacl::backend::mem_handle> multifrontal_U_element_buffers_;
-  std::list<vcl_size_t>                    
multifrontal_U_row_elimination_num_list_;
-
-};
-
-} // namespace linalg
-} // namespace viennacl
-
-
-#endif
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp
deleted file mode 100644
index 11ab842..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/ilut.hpp
+++ /dev/null
@@ -1,597 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_ILUT_HPP_
-#define VIENNACL_LINALG_DETAIL_ILUT_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/ilu/ilut.hpp
-    @brief Implementations of an incomplete factorization preconditioner with 
threshold (ILUT)
-*/
-
-#include <vector>
-#include <cmath>
-#include <iostream>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-
-#include "viennacl/linalg/detail/ilu/common.hpp"
-#include "viennacl/compressed_matrix.hpp"
-
-#include "viennacl/linalg/host_based/common.hpp"
-
-#include <map>
-
-namespace viennacl
-{
-namespace linalg
-{
-
-/** @brief A tag for incomplete LU factorization with threshold (ILUT)
-*/
-class ilut_tag
-{
-  public:
-    /** @brief The constructor.
-    *
-    * @param entries_per_row        Number of nonzero entries per row in L and 
U. Note that L and U are stored in a single matrix, thus there are 
2*entries_per_row in total.
-    * @param drop_tolerance         The drop tolerance for ILUT
-    * @param with_level_scheduling  Flag for enabling level scheduling on GPUs.
-    */
-    ilut_tag(unsigned int entries_per_row = 20,
-             double       drop_tolerance = 1e-4,
-             bool         with_level_scheduling = false)
-      : entries_per_row_(entries_per_row),
-        drop_tolerance_(drop_tolerance),
-        use_level_scheduling_(with_level_scheduling) {}
-
-    void set_drop_tolerance(double tol)
-    {
-      if (tol > 0)
-        drop_tolerance_ = tol;
-    }
-    double get_drop_tolerance() const { return drop_tolerance_; }
-
-    void set_entries_per_row(unsigned int e)
-    {
-      if (e > 0)
-        entries_per_row_ = e;
-    }
-
-    unsigned int get_entries_per_row() const { return entries_per_row_; }
-
-    bool use_level_scheduling() const { return use_level_scheduling_; }
-    void use_level_scheduling(bool b) { use_level_scheduling_ = b; }
-
-  private:
-    unsigned int entries_per_row_;
-    double       drop_tolerance_;
-    bool         use_level_scheduling_;
-};
-
-
-namespace detail
-{
-  /** @brief Helper struct for holding a sparse vector in linear memory. For 
internal use only.
-    *
-    * Unfortunately, the 'naive' implementation using a std::map<> is almost 
always too slow.
-    *
-    */
-  template<typename NumericT>
-  struct ilut_sparse_vector
-  {
-    ilut_sparse_vector(vcl_size_t alloc_size = 0) : size_(0), 
col_indices_(alloc_size), elements_(alloc_size) {}
-
-    void resize_if_bigger(vcl_size_t s)
-    {
-      if (s > elements_.size())
-      {
-        col_indices_.resize(s);
-        elements_.resize(s);
-      }
-      size_ = s;
-    }
-
-    vcl_size_t size_;
-    std::vector<unsigned int> col_indices_;
-    std::vector<NumericT>     elements_;
-  };
-
-  /** @brief Subtracts a scaled sparse vector u from a sparse vector w and 
writes the output to z: z = w - alpha * u
-    *
-    * Sparsity pattern of u and w are usually different.
-    *
-    * @return Length of new vector
-    */
-  template<typename IndexT, typename NumericT>
-  IndexT merge_subtract_sparse_rows(IndexT const * w_coords, NumericT const * 
w_elements, IndexT w_size,
-                                    IndexT const * u_coords, NumericT const * 
u_elements, IndexT u_size, NumericT alpha,
-                                    IndexT       * z_coords, NumericT       * 
z_elements)
-  {
-    IndexT index_w = 0;
-    IndexT index_u = 0;
-    IndexT index_z = 0;
-
-    while (1)
-    {
-      if (index_w < w_size && index_u < u_size)
-      {
-        if (w_coords[index_w] < u_coords[index_u])
-        {
-          z_coords[index_z]     = w_coords[index_w];
-          z_elements[index_z++] = w_elements[index_w++];
-        }
-        else if (w_coords[index_w] == u_coords[index_u])
-        {
-          z_coords[index_z]     = w_coords[index_w];
-          z_elements[index_z++] = w_elements[index_w++] - alpha * 
u_elements[index_u++];
-        }
-        else
-        {
-          z_coords[index_z]     = u_coords[index_u];
-          z_elements[index_z++] = - alpha * u_elements[index_u++];
-        }
-      }
-      else if (index_w == w_size && index_u < u_size)
-      {
-        z_coords[index_z]     = u_coords[index_u];
-        z_elements[index_z++] = - alpha * u_elements[index_u++];
-      }
-      else if (index_w < w_size && index_u == u_size)
-      {
-        z_coords[index_z]     = w_coords[index_w];
-        z_elements[index_z++] = w_elements[index_w++];
-      }
-      else
-        return index_z;
-    }
-  }
-
-  template<typename SizeT, typename NumericT>
-  void insert_with_value_sort(std::vector<std::pair<SizeT, NumericT> > & map,
-                              SizeT index, NumericT value)
-  {
-    NumericT abs_value = std::fabs(value);
-    if (abs_value > 0)
-    {
-      // find first element with smaller absolute value:
-      std::size_t first_smaller_index = 0;
-      while (first_smaller_index < map.size() && 
std::fabs(map[first_smaller_index].second) > abs_value)
-        ++first_smaller_index;
-
-      std::pair<SizeT, NumericT> tmp(index, value);
-      for (std::size_t j=first_smaller_index; j<map.size(); ++j)
-        std::swap(map[j], tmp);
-    }
-  }
-
-}
-
-/** @brief Implementation of a ILU-preconditioner with threshold. Optimized 
implementation for compressed_matrix.
-*
-* refer to Algorithm 10.6 by Saad's book (1996 edition)
-*
-*  @param A       The input matrix. Either a compressed_matrix or of type 
std::vector< std::map<T, U> >
-*  @param L       The output matrix for L.
-*  @param U       The output matrix for U.
-*  @param tag     An ilut_tag in order to dispatch among several other 
preconditioners.
-*/
-template<typename NumericT>
-void precondition(viennacl::compressed_matrix<NumericT> const & A,
-                  viennacl::compressed_matrix<NumericT>       & L,
-                  viennacl::compressed_matrix<NumericT>       & U,
-                  ilut_tag const & tag)
-{
-  assert(A.size1() == L.size1() && bool("Output matrix size mismatch") );
-  assert(A.size1() == U.size1() && bool("Output matrix size mismatch") );
-
-  L.reserve( tag.get_entries_per_row()      * A.size1());
-  U.reserve((tag.get_entries_per_row() + 1) * A.size1());
-
-  vcl_size_t avg_nnz_per_row = static_cast<vcl_size_t>(A.nnz() / A.size1());
-  detail::ilut_sparse_vector<NumericT> w1(tag.get_entries_per_row() * 
(avg_nnz_per_row + 10));
-  detail::ilut_sparse_vector<NumericT> w2(tag.get_entries_per_row() * 
(avg_nnz_per_row + 10));
-  detail::ilut_sparse_vector<NumericT> * w_in  = &w1;
-  detail::ilut_sparse_vector<NumericT> * w_out = &w2;
-  std::vector<NumericT> diagonal_U(A.size1());
-
-  NumericT     const * elements_A   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
-  unsigned int const * row_buffer_A = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(A.handle1());
-  unsigned int const * col_buffer_A = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(A.handle2());
-
-  NumericT           * elements_L   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(L.handle());
-  unsigned int       * row_buffer_L = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L.handle1()); row_buffer_L[0] = 0;
-  unsigned int       * col_buffer_L = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L.handle2());
-
-  NumericT           * elements_U   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(U.handle());
-  unsigned int       * row_buffer_U = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(U.handle1()); row_buffer_U[0] = 0;
-  unsigned int       * col_buffer_U = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(U.handle2());
-
-  std::vector<std::pair<unsigned int, NumericT> > 
sorted_entries_L(tag.get_entries_per_row());
-  std::vector<std::pair<unsigned int, NumericT> > 
sorted_entries_U(tag.get_entries_per_row());
-
-  for (vcl_size_t i=0; i<viennacl::traits::size1(A); ++i)  // Line 1
-  {
-    std::fill(sorted_entries_L.begin(), sorted_entries_L.end(), 
std::pair<unsigned int, NumericT>(0, NumericT(0)));
-    std::fill(sorted_entries_U.begin(), sorted_entries_U.end(), 
std::pair<unsigned int, NumericT>(0, NumericT(0)));
-
-    //line 2: set up w
-    w_in->resize_if_bigger(row_buffer_A[i+1] - row_buffer_A[i]);
-    NumericT row_norm = 0;
-    unsigned int k = 0;
-    for (unsigned int j = row_buffer_A[i]; j < row_buffer_A[i+1]; ++j, ++k)
-    {
-      w_in->col_indices_[k] = col_buffer_A[j];
-      NumericT entry = elements_A[j];
-      w_in->elements_[k] = entry;
-      row_norm += entry * entry;
-    }
-    row_norm = std::sqrt(row_norm);
-    NumericT tau_i = static_cast<NumericT>(tag.get_drop_tolerance()) * 
row_norm;
-
-    //line 3: Iterate over lower diagonal parts of A:
-    k = 0;
-    unsigned int current_col = (row_buffer_A[i+1] > row_buffer_A[i]) ? 
w_in->col_indices_[k] : static_cast<unsigned int>(i); // mind empty rows here!
-    while (current_col < i)
-    {
-      //line 4:
-      NumericT a_kk = diagonal_U[current_col];
-
-      NumericT w_k_entry = w_in->elements_[k] / a_kk;
-      w_in->elements_[k] = w_k_entry;
-
-      //lines 5,6: (dropping rule to w_k)
-      if ( std::fabs(w_k_entry) > tau_i)
-      {
-        //line 7:
-        unsigned int row_U_begin = row_buffer_U[current_col];
-        unsigned int row_U_end   = row_buffer_U[current_col + 1];
-
-        if (row_U_end > row_U_begin)
-        {
-          w_out->resize_if_bigger(w_in->size_ + (row_U_end - row_U_begin) - 1);
-          w_out->size_ = 
detail::merge_subtract_sparse_rows(&(w_in->col_indices_[0]), 
&(w_in->elements_[0]), static_cast<unsigned int>(w_in->size_),
-                                                            col_buffer_U + 
row_U_begin + 1, elements_U + row_U_begin + 1, (row_U_end - row_U_begin) - 1, 
w_k_entry,
-                                                            
&(w_out->col_indices_[0]), &(w_out->elements_[0])
-                                                           );
-          ++k;
-        }
-      }
-      else // drop element
-      {
-        w_out->resize_if_bigger(w_in->size_ - 1);
-        for (unsigned int r = 0; r < k; ++r)
-        {
-          w_out->col_indices_[r] = w_in->col_indices_[r];
-          w_out->elements_[r]    = w_in->elements_[r];
-        }
-        for (unsigned int r = k+1; r < w_in->size_; ++r)
-        {
-          w_out->col_indices_[r-1] = w_in->col_indices_[r];
-          w_out->elements_[r-1]    = w_in->elements_[r];
-        }
-
-        // Note: No increment to k here, element was dropped!
-      }
-
-      // swap pointers to w1 and w2
-      std::swap(w_in, w_out);
-
-      // process next entry:
-      current_col = (k < w_in->size_) ? w_in->col_indices_[k] : 
static_cast<unsigned int>(i);
-    } // while()
-
-    // Line 10: Apply a dropping rule to w
-    // To do so, we write values to a temporary array
-    for (unsigned int r = 0; r < w_in->size_; ++r)
-    {
-      unsigned int col   = w_in->col_indices_[r];
-      NumericT     value = w_in->elements_[r];
-
-      if (col < i) // entry for L:
-        detail::insert_with_value_sort(sorted_entries_L, col, value);
-      else if (col == i) // do not drop diagonal element
-      {
-        diagonal_U[i] = value;
-        if (value <= 0 && value >= 0)
-        {
-          std::cerr << "ViennaCL: FATAL ERROR in ILUT(): Diagonal entry 
computed to zero (" << value << ") in row " << i << "!" << std::endl;
-          throw zero_on_diagonal_exception("ILUT zero diagonal!");
-        }
-      }
-      else // entry for U:
-        detail::insert_with_value_sort(sorted_entries_U, col, value);
-    }
-
-    //Lines 10-12: Apply a dropping rule to w, write the largest p values to L 
and U
-    unsigned int offset_L = row_buffer_L[i];
-    std::sort(sorted_entries_L.begin(), sorted_entries_L.end());
-    for (unsigned int j=0; j<tag.get_entries_per_row(); ++j)
-      if (std::fabs(sorted_entries_L[j].second) > 0)
-      {
-        col_buffer_L[offset_L] = sorted_entries_L[j].first;
-        elements_L[offset_L]   = sorted_entries_L[j].second;
-        ++offset_L;
-      }
-    row_buffer_L[i+1] = offset_L;
-
-    unsigned int offset_U = row_buffer_U[i];
-    col_buffer_U[offset_U] = static_cast<unsigned int>(i);
-    elements_U[offset_U]   = diagonal_U[i];
-    ++offset_U;
-    std::sort(sorted_entries_U.begin(), sorted_entries_U.end());
-    for (unsigned int j=0; j<tag.get_entries_per_row(); ++j)
-      if (std::fabs(sorted_entries_U[j].second) > 0)
-      {
-        col_buffer_U[offset_U] = sorted_entries_U[j].first;
-        elements_U[offset_U]   = sorted_entries_U[j].second;
-        ++offset_U;
-      }
-    row_buffer_U[i+1] = offset_U;
-
-  } //for i
-}
-
-
-/** @brief ILUT preconditioner class, can be supplied to solve()-routines
-*/
-template<typename MatrixT>
-class ilut_precond
-{
-  typedef typename MatrixT::value_type      NumericType;
-
-public:
-  ilut_precond(MatrixT const & mat, ilut_tag const & tag) : tag_(tag), 
L_(mat.size1(), mat.size2()), U_(mat.size1(), mat.size2())
-  {
-    //initialize preconditioner:
-    //std::cout << "Start CPU precond" << std::endl;
-    init(mat);
-    //std::cout << "End CPU precond" << std::endl;
-  }
-
-  template<typename VectorT>
-  void apply(VectorT & vec) const
-  {
-    //Note: Since vec can be a rather arbitrary vector type, we call the more 
generic version in the backend manually:
-    {
-      unsigned int const * row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L_.handle1());
-      unsigned int const * col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(L_.handle2());
-      NumericType  const * elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(L_.handle());
-
-      
viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer,
 col_buffer, elements, vec, L_.size2(), unit_lower_tag());
-    }
-    {
-      unsigned int const * row_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(U_.handle1());
-      unsigned int const * col_buffer = 
viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned 
int>(U_.handle2());
-      NumericType  const * elements   = 
viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(U_.handle());
-
-      
viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer,
 col_buffer, elements, vec, U_.size2(), upper_tag());
-    }
-  }
-
-private:
-  void init(MatrixT const & mat)
-  {
-    viennacl::context host_context(viennacl::MAIN_MEMORY);
-    viennacl::compressed_matrix<NumericType> temp;
-    viennacl::switch_memory_context(temp, host_context);
-    viennacl::switch_memory_context(L_, host_context);
-    viennacl::switch_memory_context(U_, host_context);
-
-    viennacl::copy(mat, temp);
-
-    viennacl::linalg::precondition(temp, L_, U_, tag_);
-  }
-
-  ilut_tag tag_;
-  viennacl::compressed_matrix<NumericType> L_;
-  viennacl::compressed_matrix<NumericType> U_;
-};
-
-
-/** @brief ILUT preconditioner class, can be supplied to solve()-routines.
-*
-*  Specialization for compressed_matrix
-*/
-template<typename NumericT, unsigned int AlignmentV>
-class ilut_precond< viennacl::compressed_matrix<NumericT, AlignmentV> >
-{
-typedef viennacl::compressed_matrix<NumericT, AlignmentV>   MatrixType;
-
-public:
-  ilut_precond(MatrixType const & mat, ilut_tag const & tag)
-    : tag_(tag),
-      L_(mat.size1(), mat.size2(), viennacl::traits::context(mat)),
-      U_(mat.size1(), mat.size2(), viennacl::traits::context(mat))
-  {
-    //initialize preconditioner:
-    //std::cout << "Start GPU precond" << std::endl;
-    init(mat);
-    //std::cout << "End GPU precond" << std::endl;
-  }
-
-  void apply(viennacl::vector<NumericT> & vec) const
-  {
-    if (vec.handle().get_active_handle_id() != viennacl::MAIN_MEMORY)
-    {
-      if (tag_.use_level_scheduling())
-      {
-        //std::cout << "Using multifrontal on GPU..." << std::endl;
-        detail::level_scheduling_substitute(vec,
-                                            multifrontal_L_row_index_arrays_,
-                                            multifrontal_L_row_buffers_,
-                                            multifrontal_L_col_buffers_,
-                                            multifrontal_L_element_buffers_,
-                                            
multifrontal_L_row_elimination_num_list_);
-
-        vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_);
-
-        detail::level_scheduling_substitute(vec,
-                                            multifrontal_U_row_index_arrays_,
-                                            multifrontal_U_row_buffers_,
-                                            multifrontal_U_col_buffers_,
-                                            multifrontal_U_element_buffers_,
-                                            
multifrontal_U_row_elimination_num_list_);
-
-      }
-      else
-      {
-        viennacl::context host_context(viennacl::MAIN_MEMORY);
-        viennacl::context old_context = viennacl::traits::context(vec);
-        viennacl::switch_memory_context(vec, host_context);
-        viennacl::linalg::inplace_solve(L_, vec, unit_lower_tag());
-        viennacl::linalg::inplace_solve(U_, vec, upper_tag());
-        viennacl::switch_memory_context(vec, old_context);
-      }
-    }
-    else //apply ILUT directly:
-    {
-      viennacl::linalg::inplace_solve(L_, vec, unit_lower_tag());
-      viennacl::linalg::inplace_solve(U_, vec, upper_tag());
-    }
-  }
-
-private:
-  void init(MatrixType const & mat)
-  {
-    viennacl::context host_context(viennacl::MAIN_MEMORY);
-    viennacl::switch_memory_context(L_, host_context);
-    viennacl::switch_memory_context(U_, host_context);
-
-    if (viennacl::traits::context(mat).memory_type() == viennacl::MAIN_MEMORY)
-    {
-      viennacl::linalg::precondition(mat, L_, U_, tag_);
-    }
-    else //we need to copy to CPU
-    {
-      viennacl::compressed_matrix<NumericT> cpu_mat(mat.size1(), mat.size2(), 
viennacl::traits::context(mat));
-      viennacl::switch_memory_context(cpu_mat, host_context);
-
-      cpu_mat = mat;
-
-      viennacl::linalg::precondition(cpu_mat, L_, U_, tag_);
-    }
-
-    if (!tag_.use_level_scheduling())
-      return;
-
-    //
-    // multifrontal part:
-    //
-
-    viennacl::switch_memory_context(multifrontal_U_diagonal_, host_context);
-    multifrontal_U_diagonal_.resize(U_.size1(), false);
-    host_based::detail::row_info(U_, multifrontal_U_diagonal_, 
viennacl::linalg::detail::SPARSE_ROW_DIAGONAL);
-
-    detail::level_scheduling_setup_L(L_,
-                                     multifrontal_U_diagonal_, //dummy
-                                     multifrontal_L_row_index_arrays_,
-                                     multifrontal_L_row_buffers_,
-                                     multifrontal_L_col_buffers_,
-                                     multifrontal_L_element_buffers_,
-                                     multifrontal_L_row_elimination_num_list_);
-
-
-    detail::level_scheduling_setup_U(U_,
-                                     multifrontal_U_diagonal_,
-                                     multifrontal_U_row_index_arrays_,
-                                     multifrontal_U_row_buffers_,
-                                     multifrontal_U_col_buffers_,
-                                     multifrontal_U_element_buffers_,
-                                     multifrontal_U_row_elimination_num_list_);
-
-    //
-    // Bring to device if necessary:
-    //
-
-    // L:
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_L_row_index_arrays_.begin();
-                                                                       it != 
multifrontal_L_row_index_arrays_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_L_row_buffers_.begin();
-                                                                       it != 
multifrontal_L_row_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_L_col_buffers_.begin();
-                                                                       it != 
multifrontal_L_col_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_L_element_buffers_.begin();
-                                                                       it != 
multifrontal_L_element_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<NumericT>(*it, 
viennacl::traits::context(mat));
-
-
-    // U:
-
-    viennacl::switch_memory_context(multifrontal_U_diagonal_, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_U_row_index_arrays_.begin();
-                                                                       it != 
multifrontal_U_row_index_arrays_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_U_row_buffers_.begin();
-                                                                       it != 
multifrontal_U_row_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_U_col_buffers_.begin();
-                                                                       it != 
multifrontal_U_col_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<unsigned int>(*it, 
viennacl::traits::context(mat));
-
-    for (typename std::list< viennacl::backend::mem_handle >::iterator it  = 
multifrontal_U_element_buffers_.begin();
-                                                                       it != 
multifrontal_U_element_buffers_.end();
-                                                                     ++it)
-      viennacl::backend::switch_memory_context<NumericT>(*it, 
viennacl::traits::context(mat));
-
-
-  }
-
-  ilut_tag tag_;
-  viennacl::compressed_matrix<NumericT> L_;
-  viennacl::compressed_matrix<NumericT> U_;
-
-  std::list<viennacl::backend::mem_handle> multifrontal_L_row_index_arrays_;
-  std::list<viennacl::backend::mem_handle> multifrontal_L_row_buffers_;
-  std::list<viennacl::backend::mem_handle> multifrontal_L_col_buffers_;
-  std::list<viennacl::backend::mem_handle> multifrontal_L_element_buffers_;
-  std::list<vcl_size_t > multifrontal_L_row_elimination_num_list_;
-
-  viennacl::vector<NumericT> multifrontal_U_diagonal_;
-  std::list<viennacl::backend::mem_handle> multifrontal_U_row_index_arrays_;
-  std::list<viennacl::backend::mem_handle> multifrontal_U_row_buffers_;
-  std::list<viennacl::backend::mem_handle> multifrontal_U_col_buffers_;
-  std::list<viennacl::backend::mem_handle> multifrontal_U_element_buffers_;
-  std::list<vcl_size_t > multifrontal_U_row_elimination_num_list_;
-};
-
-} // namespace linalg
-} // namespace viennacl
-
-
-
-
-#endif
-
-
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp
deleted file mode 100644
index 0e2abb0..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_applier.hpp
+++ /dev/null
@@ -1,103 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_OP_APPLIER_HPP
-#define VIENNACL_LINALG_DETAIL_OP_APPLIER_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/op_applier.hpp
- *
- * @brief Defines the action of certain unary and binary operators and its 
arguments (for host execution).
-*/
-
-#include "viennacl/forwards.h"
-#include <cmath>
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-
-/** @brief Worker class for decomposing expression templates.
-  *
-  * @tparam A    Type to which is assigned to
-  * @tparam OP   One out of {op_assign, op_inplace_add, op_inplace_sub}
-  @ @tparam T    Right hand side of the assignment
-*/
-template<typename OpT>
-struct op_applier
-{
-  typedef typename OpT::ERROR_UNKNOWN_OP_TAG_PROVIDED    error_type;
-};
-
-/** \cond */
-template<>
-struct op_applier<op_element_binary<op_prod> >
-{
-  template<typename T>
-  static void apply(T & result, T const & x, T const & y) { result = x * y; }
-};
-
-template<>
-struct op_applier<op_element_binary<op_div> >
-{
-  template<typename T>
-  static void apply(T & result, T const & x, T const & y) { result = x / y; }
-};
-
-template<>
-struct op_applier<op_element_binary<op_pow> >
-{
-  template<typename T>
-  static void apply(T & result, T const & x, T const & y) { result = 
std::pow(x, y); }
-};
-
-#define VIENNACL_MAKE_UNARY_OP_APPLIER(funcname)  \
-template<> \
-struct op_applier<op_element_unary<op_##funcname> > \
-{ \
-  template<typename T> \
-  static void apply(T & result, T const & x) { using namespace std; result = 
funcname(x); } \
-}
-
-VIENNACL_MAKE_UNARY_OP_APPLIER(abs);
-VIENNACL_MAKE_UNARY_OP_APPLIER(acos);
-VIENNACL_MAKE_UNARY_OP_APPLIER(asin);
-VIENNACL_MAKE_UNARY_OP_APPLIER(atan);
-VIENNACL_MAKE_UNARY_OP_APPLIER(ceil);
-VIENNACL_MAKE_UNARY_OP_APPLIER(cos);
-VIENNACL_MAKE_UNARY_OP_APPLIER(cosh);
-VIENNACL_MAKE_UNARY_OP_APPLIER(exp);
-VIENNACL_MAKE_UNARY_OP_APPLIER(fabs);
-VIENNACL_MAKE_UNARY_OP_APPLIER(floor);
-VIENNACL_MAKE_UNARY_OP_APPLIER(log);
-VIENNACL_MAKE_UNARY_OP_APPLIER(log10);
-VIENNACL_MAKE_UNARY_OP_APPLIER(sin);
-VIENNACL_MAKE_UNARY_OP_APPLIER(sinh);
-VIENNACL_MAKE_UNARY_OP_APPLIER(sqrt);
-VIENNACL_MAKE_UNARY_OP_APPLIER(tan);
-VIENNACL_MAKE_UNARY_OP_APPLIER(tanh);
-
-#undef VIENNACL_MAKE_UNARY_OP_APPLIER
-/** \endcond */
-
-}
-}
-}
-
-#endif // VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp
deleted file mode 100644
index bd49b3b..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/op_executor.hpp
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP
-#define VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/detail/op_executor.hpp
- *
- * @brief Defines the worker class for decomposing an expression tree into 
small chunks, which can be processed by the predefined operations in ViennaCL.
-*/
-
-#include "viennacl/forwards.h"
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-
-template<typename NumericT, typename B>
-bool op_aliasing(vector_base<NumericT> const & /*lhs*/, B const & /*b*/)
-{
-  return false;
-}
-
-template<typename NumericT>
-bool op_aliasing(vector_base<NumericT> const & lhs, vector_base<NumericT> 
const & b)
-{
-  return lhs.handle() == b.handle();
-}
-
-template<typename NumericT, typename LhsT, typename RhsT, typename OpT>
-bool op_aliasing(vector_base<NumericT> const & lhs, vector_expression<const 
LhsT, const RhsT, OpT> const & rhs)
-{
-  return op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs());
-}
-
-
-template<typename NumericT, typename B>
-bool op_aliasing(matrix_base<NumericT> const & /*lhs*/, B const & /*b*/)
-{
-  return false;
-}
-
-template<typename NumericT>
-bool op_aliasing(matrix_base<NumericT> const & lhs, matrix_base<NumericT> 
const & b)
-{
-  return lhs.handle() == b.handle();
-}
-
-template<typename NumericT, typename LhsT, typename RhsT, typename OpT>
-bool op_aliasing(matrix_base<NumericT> const & lhs, matrix_expression<const 
LhsT, const RhsT, OpT> const & rhs)
-{
-  return op_aliasing(lhs, rhs.lhs()) || op_aliasing(lhs, rhs.rhs());
-}
-
-
-/** @brief Worker class for decomposing expression templates.
-  *
-  * @tparam A    Type to which is assigned to
-  * @tparam OP   One out of {op_assign, op_inplace_add, op_inplace_sub}
-  @ @tparam T    Right hand side of the assignment
-*/
-template<typename A, typename OP, typename T>
-struct op_executor {};
-
-}
-}
-}
-
-#endif // VIENNACL_LINALG_DETAIL_OP_EXECUTOR_HPP

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp
deleted file mode 100644
index 12ff77b..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_matrix.hpp
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_SPAI_BLOCK_MATRIX_HPP
-#define VIENNACL_LINALG_DETAIL_SPAI_BLOCK_MATRIX_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <algorithm>
-#include <vector>
-#include "viennacl/ocl/backend.hpp"
-#include "viennacl/tools/tools.hpp"
-
-/** @file viennacl/linalg/detail/spai/block_matrix.hpp
-    @brief Implementation of a bunch of (small) matrices on GPU. Experimental.
-
-    SPAI code contributed by Nikolay Lukash
-*/
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-namespace spai
-{
-
-/**
-* @brief Represents contigious matrices on GPU
-*/
-
-class block_matrix
-{
-public:
-
-  ////////// non-const
-
-  /** @brief Returns a handle to the elements */
-  viennacl::ocl::handle<cl_mem>& handle(){ return elements_; }
-
-  /** @brief Returns a handle to the matrix dimensions */
-  viennacl::ocl::handle<cl_mem>& handle1() { return matrix_dimensions_; }
-
-  /** @brief Returns a handle to the start indices of matrix */
-  viennacl::ocl::handle<cl_mem>& handle2() { return start_block_inds_; }
-
-  ////////// const
-
-  /** @brief Returns a handle to the const elements */
-  const viennacl::ocl::handle<cl_mem>& handle() const { return elements_; }
-
-  /** @brief Returns a handle to the const matrix dimensions */
-  const viennacl::ocl::handle<cl_mem>& handle1() const { return 
matrix_dimensions_; }
-
-  /** @brief Returns a handle to the const start indices of matrix */
-  const viennacl::ocl::handle<cl_mem>& handle2() const { return 
start_block_inds_; }
-
-private:
-  viennacl::ocl::handle<cl_mem> elements_;
-  viennacl::ocl::handle<cl_mem> matrix_dimensions_;
-  viennacl::ocl::handle<cl_mem> start_block_inds_;
-};
-
-
-}
-}
-}
-}
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp
deleted file mode 100644
index eee6aef..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/block_vector.hpp
+++ /dev/null
@@ -1,77 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_SPAI_BLOCK_VECTOR_HPP
-#define VIENNACL_LINALG_DETAIL_SPAI_BLOCK_VECTOR_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <algorithm>
-#include <vector>
-#include "viennacl/ocl/backend.hpp"
-#include "viennacl/tools/tools.hpp"
-
-/** @file viennacl/linalg/detail/spai/block_vector.hpp
-    @brief Implementation of a bunch of vectors on GPU. Experimental.
-
-    SPAI code contributed by Nikolay Lukash
-*/
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-namespace spai
-{
-
-/**
-* @brief Represents a contiguous vector on the GPU to represent a 
concatentation of small vectors
-*/
-class block_vector
-{
-public:
-
-  ///////////// non-const
-
-  /** @brief Return handle to the elements */
-  viennacl::ocl::handle<cl_mem> & handle(){ return elements_; }
-
-  /** @brief Return handle to start indices */
-  viennacl::ocl::handle<cl_mem> & handle1() { return start_block_inds_; }
-
-  ///////////// const
-
-  /** @brief Return handle to the const elements */
-  const viennacl::ocl::handle<cl_mem> & handle() const { return elements_; }
-
-  /** @brief Return handle to const start indices */
-  const viennacl::ocl::handle<cl_mem> & handle1() const { return 
start_block_inds_; }
-
-private:
-  viennacl::ocl::handle<cl_mem> elements_;
-  viennacl::ocl::handle<cl_mem> start_block_inds_;
-};
-
-}
-}
-}
-}
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp
deleted file mode 100644
index fab81d7..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/spai/fspai.hpp
+++ /dev/null
@@ -1,402 +0,0 @@
-#ifndef VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
-#define VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-#include <utility>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <algorithm>
-#include <vector>
-#include <math.h>
-#include <map>
-
-//boost includes
-#include "boost/numeric/ublas/vector.hpp"
-#include "boost/numeric/ublas/matrix.hpp"
-#include "boost/numeric/ublas/matrix_proxy.hpp"
-#include "boost/numeric/ublas/vector_proxy.hpp"
-#include "boost/numeric/ublas/storage.hpp"
-#include "boost/numeric/ublas/io.hpp"
-#include "boost/numeric/ublas/lu.hpp"
-#include "boost/numeric/ublas/triangular.hpp"
-#include "boost/numeric/ublas/matrix_expression.hpp"
-
-// ViennaCL includes
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/compressed_matrix.hpp"
-#include "viennacl/linalg/sparse_matrix_operations.hpp"
-#include "viennacl/linalg/matrix_operations.hpp"
-#include "viennacl/scalar.hpp"
-#include "viennacl/linalg/cg.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/ilu.hpp"
-//#include <omp.h>
-
-/** @file viennacl/linalg/detail/spai/fspai.hpp
-    @brief Implementation of FSPAI. Experimental.
-*/
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-namespace spai
-{
-
-/** @brief A tag for FSPAI. Experimental.
-*
-* Contains values for the algorithm.
-* Must be passed to spai_precond constructor
-*/
-class fspai_tag
-{
-public:
-  /** @brief Constructor
-   *
-   * @param residual_norm_threshold Calculate until the norm of the residual 
falls below this threshold
-   * @param iteration_limit maximum number of iterations
-   * @param is_static determines if static version of SPAI should be used
-   * @param is_right determines if left or right preconditioner should be used
-   */
-  fspai_tag(
-          double residual_norm_threshold = 1e-3,
-          unsigned int iteration_limit = 5,
-          bool is_static = false,
-          bool is_right = false)
-    : residual_norm_threshold_(residual_norm_threshold),
-      iteration_limit_(iteration_limit),
-      is_static_(is_static),
-      is_right_(is_right) {}
-
-  inline double getResidualNormThreshold() const { return 
residual_norm_threshold_; }
-  inline unsigned long getIterationLimit () const { return iteration_limit_; }
-  inline bool getIsStatic() const { return is_static_; }
-  inline bool getIsRight() const  { return is_right_; }
-  inline void setResidualNormThreshold(double residual_norm_threshold)
-  {
-    if (residual_norm_threshold > 0)
-      residual_norm_threshold_ = residual_norm_threshold;
-  }
-  inline void setIterationLimit(unsigned long iteration_limit)
-  {
-    if (iteration_limit > 0)
-      iteration_limit_ = iteration_limit;
-  }
-  inline void setIsRight(bool is_right)   { is_right_  = is_right; }
-  inline void setIsStatic(bool is_static) { is_static_ = is_static; }
-
-private:
-  double residual_norm_threshold_;
-  unsigned long iteration_limit_;
-  bool is_static_;
-  bool is_right_;
-};
-
-
-//
-// Helper: Store A in an STL container of type, exploiting symmetry
-// Reason: ublas interface does not allow to iterate over nonzeros of a 
particular row without starting an iterator1 from the very beginning of the 
matrix...
-//
-template<typename MatrixT, typename NumericT>
-void sym_sparse_matrix_to_stl(MatrixT const & A, std::vector<std::map<unsigned 
int, NumericT> > & STL_A)
-{
-  STL_A.resize(A.size1());
-  for (typename MatrixT::const_iterator1 row_it  = A.begin1();
-                                         row_it != A.end1();
-                                       ++row_it)
-  {
-    for (typename MatrixT::const_iterator2 col_it  = row_it.begin();
-                                           col_it != row_it.end();
-                                         ++col_it)
-    {
-      if (col_it.index1() >= col_it.index2())
-        STL_A[col_it.index1()][static_cast<unsigned int>(col_it.index2())] = 
*col_it;
-      else
-        break; //go to next row
-    }
-  }
-}
-
-
-//
-// Generate index sets J_k, k=0,...,N-1
-//
-template<typename MatrixT>
-void generateJ(MatrixT const & A, std::vector<std::vector<vcl_size_t> > & J)
-{
-  for (typename MatrixT::const_iterator1 row_it  = A.begin1();
-                                         row_it != A.end1();
-                                       ++row_it)
-  {
-    for (typename MatrixT::const_iterator2 col_it  = row_it.begin();
-                                           col_it != row_it.end();
-                                         ++col_it)
-    {
-      if (col_it.index1() > col_it.index2()) //Matrix is symmetric, thus only 
work on lower triangular part
-      {
-        J[col_it.index2()].push_back(col_it.index1());
-        J[col_it.index1()].push_back(col_it.index2());
-      }
-      else
-        break; //go to next row
-    }
-  }
-}
-
-
-//
-// Extracts the blocks A(\tilde{J}_k, \tilde{J}_k) from A
-// Sets up y_k = A(\tilde{J}_k, k) for the inplace-solution after 
Cholesky-factoriation
-//
-template<typename NumericT, typename MatrixT, typename VectorT>
-void fill_blocks(std::vector< std::map<unsigned int, NumericT> > & A,
-                 std::vector<MatrixT>                            & blocks,
-                 std::vector<std::vector<vcl_size_t> > const     & J,
-                 std::vector<VectorT>                            & Y)
-{
-  for (vcl_size_t k=0; k<A.size(); ++k)
-  {
-    std::vector<vcl_size_t> const & Jk = J[k];
-    VectorT & yk = Y[k];
-    MatrixT & block_k = blocks[k];
-
-    yk.resize(Jk.size());
-    block_k.resize(Jk.size(), Jk.size());
-    block_k.clear();
-
-    for (vcl_size_t i=0; i<Jk.size(); ++i)
-    {
-      vcl_size_t row_index = Jk[i];
-      std::map<unsigned int, NumericT> & A_row = A[row_index];
-
-      //fill y_k:
-      yk[i] = A_row[static_cast<unsigned int>(k)];
-
-      for (vcl_size_t j=0; j<Jk.size(); ++j)
-      {
-        vcl_size_t col_index = Jk[j];
-        if (col_index <= row_index && A_row.find(static_cast<unsigned 
int>(col_index)) != A_row.end()) //block is symmetric, thus store only lower 
triangular part
-          block_k(i, j) = A_row[static_cast<unsigned int>(col_index)];
-      }
-    }
-  }
-}
-
-
-//
-// Perform Cholesky factorization of A inplace. Cf. Schwarz: Numerische 
Mathematik, vol 5, p. 58
-//
-template<typename MatrixT>
-void cholesky_decompose(MatrixT & A)
-{
-  for (vcl_size_t k=0; k<A.size2(); ++k)
-  {
-    if (A(k,k) <= 0)
-    {
-      std::cout << "k: " << k << std::endl;
-      std::cout << "A(k,k): " << A(k,k) << std::endl;
-    }
-
-    assert(A(k,k) > 0 && bool("Matrix not positive definite in Cholesky 
factorization."));
-
-    A(k,k) = std::sqrt(A(k,k));
-
-    for (vcl_size_t i=k+1; i<A.size1(); ++i)
-    {
-      A(i,k) /= A(k,k);
-      for (vcl_size_t j=k+1; j<=i; ++j)
-        A(i,j) -= A(i,k) * A(j,k);
-    }
-  }
-}
-
-
-//
-// Compute x in Ax = b, where A is already Cholesky factored (A = L L^T)
-//
-template<typename MatrixT, typename VectorT>
-void cholesky_solve(MatrixT const & L, VectorT & b)
-{
-  // inplace forward solve L x = b
-  for (vcl_size_t i=0; i<L.size1(); ++i)
-  {
-    for (vcl_size_t j=0; j<i; ++j)
-      b[i] -= L(i,j) * b[j];
-    b[i] /= L(i,i);
-  }
-
-  // inplace backward solve L^T x = b:
-  for (vcl_size_t i=L.size1()-1;; --i)
-  {
-    for (vcl_size_t k=i+1; k<L.size1(); ++k)
-      b[i] -= L(k,i) * b[k];
-    b[i] /= L(i,i);
-
-    if (i==0) //vcl_size_t might be unsigned, therefore manual check for 
equality with zero here
-      break;
-  }
-}
-
-
-
-//
-// Compute the Cholesky factor L from the sparse vectors y_k
-//
-template<typename MatrixT, typename VectorT>
-void computeL(MatrixT const & A,
-              MatrixT       & L,
-              MatrixT       & L_trans,
-              std::vector<VectorT> & Y,
-              std::vector<std::vector<vcl_size_t> > & J)
-{
-  typedef typename VectorT::value_type                          NumericType;
-  typedef std::vector<std::map<unsigned int, NumericType> >     
STLSparseMatrixType;
-
-  STLSparseMatrixType L_temp(A.size1());
-
-  for (vcl_size_t k=0; k<A.size1(); ++k)
-  {
-    std::vector<vcl_size_t> const & Jk = J[k];
-    VectorT const & yk = Y[k];
-
-    //compute L(k,k):
-    NumericType Lkk = A(k,k);
-    for (vcl_size_t i=0; i<Jk.size(); ++i)
-      Lkk -= A(Jk[i],k) * yk[i];
-
-    Lkk = NumericType(1) / std::sqrt(Lkk);
-    L_temp[k][static_cast<unsigned int>(k)] = Lkk;
-    L_trans(k,k) = Lkk;
-
-    //write lower diagonal entries:
-    for (vcl_size_t i=0; i<Jk.size(); ++i)
-    {
-      L_temp[Jk[i]][static_cast<unsigned int>(k)] = -Lkk * yk[i];
-      L_trans(k, Jk[i]) = -Lkk * yk[i];
-    }
-  } //for k
-
-
-  //build L from L_temp
-  for (vcl_size_t i=0; i<L_temp.size(); ++i)
-    for (typename std::map<unsigned int, NumericType>::const_iterator it = 
L_temp[i].begin();
-           it != L_temp[i].end();
-         ++it)
-      L(i, it->first) = it->second;
-}
-
-
-//
-// Top level FSPAI function
-//
-template<typename MatrixT>
-void computeFSPAI(MatrixT const & A,
-                  MatrixT const & PatternA,
-                  MatrixT       & L,
-                  MatrixT       & L_trans,
-                  fspai_tag)
-{
-  typedef typename MatrixT::value_type                    NumericT;
-  typedef boost::numeric::ublas::matrix<NumericT>         DenseMatrixType;
-  typedef std::vector<std::map<unsigned int, NumericT> >  SparseMatrixType;
-
-  //
-  // preprocessing: Store A in a STL container:
-  //
-  //std::cout << "Transferring to STL container:" << std::endl;
-  std::vector<std::vector<NumericT> >    y_k(A.size1());
-  SparseMatrixType   STL_A(A.size1());
-  sym_sparse_matrix_to_stl(A, STL_A);
-
-
-  //
-  // Step 1: Generate pattern indices
-  //
-  //std::cout << "computeFSPAI(): Generating pattern..." << std::endl;
-  std::vector<std::vector<vcl_size_t> > J(A.size1());
-  generateJ(PatternA, J);
-
-  //
-  // Step 2: Set up matrix blocks
-  //
-  //std::cout << "computeFSPAI(): Setting up matrix blocks..." << std::endl;
-  std::vector<DenseMatrixType>  subblocks_A(A.size1());
-  fill_blocks(STL_A, subblocks_A, J, y_k);
-  STL_A.clear(); //not needed anymore
-
-  //
-  // Step 3: Cholesky-factor blocks
-  //
-  //std::cout << "computeFSPAI(): Cholesky-factorization..." << std::endl;
-  for (vcl_size_t i=0; i<subblocks_A.size(); ++i)
-  {
-    //std::cout << "Block before: " << subblocks_A[i] << std::endl;
-    cholesky_decompose(subblocks_A[i]);
-    //std::cout << "Block after: " << subblocks_A[i] << std::endl;
-  }
-
-
-  /*vcl_size_t num_bytes = 0;
-  for (vcl_size_t i=0; i<subblocks_A.size(); ++i)
-    num_bytes += 8*subblocks_A[i].size1()*subblocks_A[i].size2();*/
-  //std::cout << "Memory for FSPAI matrix: " << num_bytes / (1024.0 * 1024.0) 
<< " MB" << std::endl;
-
-  //
-  // Step 4: Solve for y_k
-  //
-  //std::cout << "computeFSPAI(): Cholesky-solve..." << std::endl;
-  for (vcl_size_t i=0; i<y_k.size(); ++i)
-  {
-    if (subblocks_A[i].size1() > 0) //block might be empty...
-    {
-      //y_k[i].resize(subblocks_A[i].size1());
-      //std::cout << "y_k[" << i << "]: ";
-      //for (vcl_size_t j=0; j<y_k[i].size(); ++j)
-      //  std::cout << y_k[i][j] << " ";
-      //std::cout << std::endl;
-      cholesky_solve(subblocks_A[i], y_k[i]);
-    }
-  }
-
-
-  //
-  // Step 5: Set up Cholesky factors L and L_trans
-  //
-  //std::cout << "computeFSPAI(): Computing L..." << std::endl;
-  L.resize(A.size1(), A.size2(), false);
-  L.reserve(A.nnz(), false);
-  L_trans.resize(A.size1(), A.size2(), false);
-  L_trans.reserve(A.nnz(), false);
-  computeL(A, L, L_trans, y_k, J);
-
-  //std::cout << "L: " << L << std::endl;
-}
-
-
-
-}
-}
-}
-}
-
-#endif

Reply via email to