http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp
deleted file mode 100644
index 0b81203..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp
+++ /dev/null
@@ -1,398 +0,0 @@
-#ifndef VIENNACL_LINALG_AMG_HPP_
-#define VIENNACL_LINALG_AMG_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/amg.hpp
-    @brief Main include file for algebraic multigrid (AMG) preconditioners.  
Experimental.
-
-    Implementation contributed by Markus Wagner
-*/
-
-#include <vector>
-#include <cmath>
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/direct_solve.hpp"
-#include "viennacl/compressed_matrix.hpp"
-
-#include "viennacl/linalg/detail/amg/amg_base.hpp"
-#include "viennacl/linalg/sparse_matrix_operations.hpp"
-#include "viennacl/linalg/amg_operations.hpp"
-#include "viennacl/tools/timer.hpp"
-#include "viennacl/linalg/direct_solve.hpp"
-#include "viennacl/linalg/lu.hpp"
-
-#include <map>
-
-#ifdef VIENNACL_WITH_OPENMP
- #include <omp.h>
-#endif
-
-#define VIENNACL_AMG_MAX_LEVELS 20
-
-namespace viennacl
-{
-namespace linalg
-{
-
-class amg_coarse_problem_too_large_exception : public std::runtime_error
-{
-public:
-  amg_coarse_problem_too_large_exception(std::string const & msg, vcl_size_t 
num_points) : std::runtime_error(msg), c_points_(num_points) {}
-
-  /** @brief Returns the number of coarse points for which no further 
coarsening could be applied */
-  vcl_size_t coarse_points() const { return c_points_; }
-
-private:
-  vcl_size_t c_points_;
-};
-
-
-namespace detail
-{
-  /** @brief Sparse Galerkin product: Calculates A_coarse = trans(P)*A_fine*P 
= R*A_fine*P
-    *
-    * @param A_fine    Operator matrix on fine grid (quadratic)
-    * @param P         Prolongation/Interpolation matrix
-    * @param R         Restriction matrix
-    * @param A_coarse  Result matrix on coarse grid (Galerkin operator)
-    */
-  template<typename NumericT>
-  void amg_galerkin_prod(compressed_matrix<NumericT> & A_fine,
-                         compressed_matrix<NumericT> & P,
-                         compressed_matrix<NumericT> & R, //P^T
-                         compressed_matrix<NumericT> & A_coarse)
-  {
-
-    compressed_matrix<NumericT> 
A_fine_times_P(viennacl::traits::context(A_fine));
-
-    // transpose P in memory (no known way of efficiently multiplying P^T * B 
for CSR-matrices P and B):
-    viennacl::linalg::detail::amg::amg_transpose(P, R);
-
-    // compute Galerkin product using a temporary for the result of A_fine * P
-    A_fine_times_P = viennacl::linalg::prod(A_fine, P);
-    A_coarse = viennacl::linalg::prod(R, A_fine_times_P);
-
-  }
-
-
-  /** @brief Setup AMG preconditioner
-  *
-  * @param list_of_A                  Operator matrices on all levels
-  * @param list_of_P                  Prolongation/Interpolation operators on 
all levels
-  * @param list_of_R                  Restriction operators on all levels
-  * @param list_of_amg_level_context  Auxiliary datastructures for managing 
the grid hierarchy (coarse nodes, etc.)
-  * @param tag                        AMG preconditioner tag
-  */
-  template<typename NumericT, typename AMGContextListT>
-  vcl_size_t amg_setup(std::vector<compressed_matrix<NumericT> > & list_of_A,
-                       std::vector<compressed_matrix<NumericT> > & list_of_P,
-                       std::vector<compressed_matrix<NumericT> > & list_of_R,
-                       AMGContextListT & list_of_amg_level_context,
-                       amg_tag & tag)
-  {
-    // Set number of iterations. If automatic coarse grid construction is 
chosen (0), then set a maximum size and stop during the process.
-    vcl_size_t iterations = tag.get_coarse_levels();
-    if (iterations == 0)
-      iterations = VIENNACL_AMG_MAX_LEVELS;
-
-    for (vcl_size_t i=0; i<iterations; ++i)
-    {
-      list_of_amg_level_context[i].switch_context(tag.get_setup_context());
-      list_of_amg_level_context[i].resize(list_of_A[i].size1(), 
list_of_A[i].nnz());
-
-      // Construct C and F points on coarse level (i is fine level, i+1 coarse 
level).
-      detail::amg::amg_coarse(list_of_A[i], list_of_amg_level_context[i], tag);
-
-      // Calculate number of C and F points on level i.
-      unsigned int c_points = list_of_amg_level_context[i].num_coarse_;
-      unsigned int f_points = static_cast<unsigned int>(list_of_A[i].size1()) 
- c_points;
-
-      if (f_points == 0 && c_points > tag.get_coarsening_cutoff())
-      {
-        std::stringstream ss;
-        ss << "No further coarsening possible (" << c_points << " coarse 
points). Consider changing the strong connection threshold or increasing the 
coarsening cutoff." << std::endl;
-        throw amg_coarse_problem_too_large_exception(ss.str(), c_points);
-      }
-
-      // Stop routine when the maximal coarse level is found (no C or F 
point). Coarsest level is level i.
-      if (c_points == 0 || f_points == 0)
-        break;
-
-      // Construct interpolation matrix for level i.
-      detail::amg::amg_interpol(list_of_A[i], list_of_P[i], 
list_of_amg_level_context[i], tag);
-
-      // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = 
trans(P).
-      amg_galerkin_prod(list_of_A[i], list_of_P[i], list_of_R[i], 
list_of_A[i+1]);
-
-      // send matrices to target context:
-      list_of_A[i].switch_memory_context(tag.get_target_context());
-      list_of_P[i].switch_memory_context(tag.get_target_context());
-      list_of_R[i].switch_memory_context(tag.get_target_context());
-
-      // If Limit of coarse points is reached then stop. Coarsest level is 
level i+1.
-      if (tag.get_coarse_levels() == 0 && c_points <= 
tag.get_coarsening_cutoff())
-        return i+1;
-    }
-
-    return iterations;
-  }
-
-
-  /** @brief Initialize AMG preconditioner
-  *
-  * @param mat                        System matrix
-  * @param list_of_A                  Operator matrices on all levels
-  * @param list_of_P                  Prolongation/Interpolation operators on 
all levels
-  * @param list_of_R                  Restriction operators on all levels
-  * @param list_of_amg_level_context  Auxiliary datastructures for managing 
the grid hierarchy (coarse nodes, etc.)
-  * @param tag                        AMG preconditioner tag
-  */
-  template<typename MatrixT, typename InternalT1, typename InternalT2>
-  void amg_init(MatrixT const & mat, InternalT1 & list_of_A, InternalT1 & 
list_of_P, InternalT1 & list_of_R, InternalT2 & list_of_amg_level_context, 
amg_tag & tag)
-  {
-    typedef typename InternalT1::value_type SparseMatrixType;
-
-    vcl_size_t num_levels = (tag.get_coarse_levels() > 0) ? 
tag.get_coarse_levels() : VIENNACL_AMG_MAX_LEVELS;
-
-    list_of_A.resize(num_levels+1, SparseMatrixType(tag.get_setup_context()));
-    list_of_P.resize(num_levels,   SparseMatrixType(tag.get_setup_context()));
-    list_of_R.resize(num_levels,   SparseMatrixType(tag.get_setup_context()));
-    list_of_amg_level_context.resize(num_levels);
-
-    // Insert operator matrix as operator for finest level.
-    //SparseMatrixType A0(mat);
-    //A.insert_element(0, A0);
-    list_of_A[0].switch_memory_context(viennacl::traits::context(mat));
-    list_of_A[0] = mat;
-    list_of_A[0].switch_memory_context(tag.get_setup_context());
-  }
-
-  /** @brief Setup data structures for precondition phase for later use on the 
GPU
-  *
-  * @param result          Result vector on all levels
-  * @param result_backup   Copy of result vector on all levels
-  * @param rhs             RHS vector on all levels
-  * @param residual        Residual vector on all levels
-  * @param A               Operators matrices on all levels from setup phase
-  * @param coarse_levels   Number of coarse levels for which the 
datastructures should be set up.
-  * @param tag             AMG preconditioner tag
-  */
-  template<typename InternalVectorT, typename SparseMatrixT>
-  void amg_setup_apply(InternalVectorT & result,
-                       InternalVectorT & result_backup,
-                       InternalVectorT & rhs,
-                       InternalVectorT & residual,
-                       SparseMatrixT const & A,
-                       vcl_size_t coarse_levels,
-                       amg_tag const & tag)
-  {
-    typedef typename InternalVectorT::value_type VectorType;
-
-    result.resize(coarse_levels + 1);
-    result_backup.resize(coarse_levels + 1);
-    rhs.resize(coarse_levels + 1);
-    residual.resize(coarse_levels);
-
-    for (vcl_size_t level=0; level <= coarse_levels; ++level)
-    {
-             result[level] = VectorType(A[level].size1(), 
tag.get_target_context());
-      result_backup[level] = VectorType(A[level].size1(), 
tag.get_target_context());
-                rhs[level] = VectorType(A[level].size1(), 
tag.get_target_context());
-    }
-    for (vcl_size_t level=0; level < coarse_levels; ++level)
-    {
-      residual[level] = VectorType(A[level].size1(), tag.get_target_context());
-    }
-  }
-
-
-  /** @brief Pre-compute LU factorization for direct solve (ublas library).
-  *
-  * Speeds up precondition phase as this is computed only once overall instead 
of once per iteration.
-  *
-  * @param op           Operator matrix for direct solve
-  * @param A            Operator matrix on coarsest level
-  * @param tag          AMG preconditioner tag
-  */
-  template<typename NumericT, typename SparseMatrixT>
-  void amg_lu(viennacl::matrix<NumericT> & op,
-              SparseMatrixT const & A,
-              amg_tag const & tag)
-  {
-    op.switch_memory_context(tag.get_setup_context());
-    op.resize(A.size1(), A.size2(), false);
-    viennacl::linalg::detail::amg::assign_to_dense(A, op);
-
-    viennacl::linalg::lu_factorize(op);
-    op.switch_memory_context(tag.get_target_context());
-  }
-
-}
-
-/** @brief AMG preconditioner class, can be supplied to solve()-routines
-*/
-template<typename MatrixT>
-class amg_precond;
-
-
-/** @brief AMG preconditioner class, can be supplied to solve()-routines.
-*
-*  Specialization for compressed_matrix
-*/
-template<typename NumericT, unsigned int AlignmentV>
-class amg_precond< compressed_matrix<NumericT, AlignmentV> >
-{
-  typedef viennacl::compressed_matrix<NumericT, AlignmentV> SparseMatrixType;
-  typedef viennacl::vector<NumericT>                        VectorType;
-  typedef detail::amg::amg_level_context                    AMGContextType;
-
-public:
-
-  amg_precond() {}
-
-  /** @brief The constructor. Builds data structures.
-  *
-  * @param mat  System matrix
-  * @param tag  The AMG tag
-  */
-  amg_precond(compressed_matrix<NumericT, AlignmentV> const & mat,
-              amg_tag const & tag)
-  {
-    tag_ = tag;
-
-    // Initialize data structures.
-    detail::amg_init(mat, A_list_, P_list_, R_list_, amg_context_list_, tag_);
-  }
-
-  /** @brief Start setup phase for this class and copy data structures.
-  */
-  void setup()
-  {
-    // Start setup phase.
-    vcl_size_t num_coarse_levels = detail::amg_setup(A_list_, P_list_, 
R_list_, amg_context_list_, tag_);
-
-    // Setup precondition phase (Data structures).
-    detail::amg_setup_apply(result_list_, result_backup_list_, rhs_list_, 
residual_list_, A_list_, num_coarse_levels, tag_);
-
-    // LU factorization for direct solve.
-    detail::amg_lu(coarsest_op_, A_list_[num_coarse_levels], tag_);
-  }
-
-
-  /** @brief Precondition Operation
-  *
-  * @param vec       The vector to which preconditioning is applied to
-  */
-  template<typename VectorT>
-  void apply(VectorT & vec) const
-  {
-    vcl_size_t level;
-
-    // Precondition operation (Yang, p.3).
-    rhs_list_[0] = vec;
-
-    // Part 1: Restrict down to coarsest level
-    for (level=0; level < residual_list_.size(); level++)
-    {
-      result_list_[level].clear();
-
-      // Apply Smoother presmooth_ times.
-      viennacl::linalg::detail::amg::smooth_jacobi(static_cast<unsigned 
int>(tag_.get_presmooth_steps()),
-                                                   A_list_[level],
-                                                   result_list_[level],
-                                                   result_backup_list_[level],
-                                                   rhs_list_[level],
-                                                   
static_cast<NumericT>(tag_.get_jacobi_weight()));
-
-      // Compute residual.
-      //residual[level] = rhs_[level] - viennacl::linalg::prod(A_[level], 
result_[level]);
-      residual_list_[level] = viennacl::linalg::prod(A_list_[level], 
result_list_[level]);
-      residual_list_[level] = rhs_list_[level] - residual_list_[level];
-
-      // Restrict to coarse level. Result is RHS of coarse level equation.
-      //residual_coarse[level] = 
viennacl::linalg::prod(R[level],residual[level]);
-      rhs_list_[level+1] = viennacl::linalg::prod(R_list_[level], 
residual_list_[level]);
-    }
-
-    // Part 2: On highest level use direct solve to solve equation (on the CPU)
-    result_list_[level] = rhs_list_[level];
-    viennacl::linalg::lu_substitute(coarsest_op_, result_list_[level]);
-
-    // Part 3: Prolongation to finest level
-    for (int level2 = static_cast<int>(residual_list_.size()-1); level2 >= 0; 
level2--)
-    {
-      level = static_cast<vcl_size_t>(level2);
-
-      // Interpolate error to fine level and correct solution.
-      result_backup_list_[level] = viennacl::linalg::prod(P_list_[level], 
result_list_[level+1]);
-      result_list_[level] += result_backup_list_[level];
-
-      // Apply Smoother postsmooth_ times.
-      viennacl::linalg::detail::amg::smooth_jacobi(static_cast<unsigned 
int>(tag_.get_postsmooth_steps()),
-                                                   A_list_[level],
-                                                   result_list_[level],
-                                                   result_backup_list_[level],
-                                                   rhs_list_[level],
-                                                   
static_cast<NumericT>(tag_.get_jacobi_weight()));
-    }
-    vec = result_list_[0];
-  }
-
-  /** @brief Returns the total number of multigrid levels in the hierarchy 
including the finest level. */
-  vcl_size_t levels() const { return residual_list_.size(); }
-
-
-  /** @brief Returns the problem/operator size at the respective multigrid 
level
-    *
-    * @param level     Index of the multigrid level. 0 is the finest level, 
levels() - 1 is the coarsest level.
-    */
-  vcl_size_t size(vcl_size_t level) const
-  {
-    assert(level < levels() && bool("Level index out of bounds!"));
-    return residual_list_[level].size();
-  }
-
-  /** @brief Returns the associated preconditioner tag containing the 
configuration for the multigrid preconditioner. */
-  amg_tag const & tag() const { return tag_; }
-
-private:
-  std::vector<SparseMatrixType> A_list_;
-  std::vector<SparseMatrixType> P_list_;
-  std::vector<SparseMatrixType> R_list_;
-  std::vector<AMGContextType>   amg_context_list_;
-
-  viennacl::matrix<NumericT>        coarsest_op_;
-
-  mutable std::vector<VectorType> result_list_;
-  mutable std::vector<VectorType> result_backup_list_;
-  mutable std::vector<VectorType> rhs_list_;
-  mutable std::vector<VectorType> residual_list_;
-
-  amg_tag tag_;
-};
-
-}
-}
-
-
-
-#endif
-

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp
deleted file mode 100644
index 9c7f79f..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp
+++ /dev/null
@@ -1,238 +0,0 @@
-#ifndef VIENNACL_LINALG_AMG_OPERATIONS_HPP_
-#define VIENNACL_LINALG_AMG_OPERATIONS_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the PDF manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/amg_operations.hpp
-    @brief Implementations of operations for algebraic multigrid
-*/
-
-#include "viennacl/forwards.h"
-#include "viennacl/scalar.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/matrix.hpp"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/detail/amg/amg_base.hpp"
-#include "viennacl/linalg/host_based/amg_operations.hpp"
-
-#ifdef VIENNACL_WITH_OPENCL
-  #include "viennacl/linalg/opencl/amg_operations.hpp"
-#endif
-
-#ifdef VIENNACL_WITH_CUDA
-  #include "viennacl/linalg/cuda/amg_operations.hpp"
-#endif
-
-namespace viennacl
-{
-namespace linalg
-{
-namespace detail
-{
-namespace amg
-{
-
-template<typename NumericT, typename AMGContextT>
-void amg_influence(compressed_matrix<NumericT> const & A, AMGContextT & 
amg_context, amg_tag & tag)
-{
-  switch (viennacl::traits::handle(A).get_active_handle_id())
-  {
-    case viennacl::MAIN_MEMORY:
-      viennacl::linalg::host_based::amg::amg_influence(A, amg_context, tag);
-      break;
-#ifdef VIENNACL_WITH_OPENCL
-    case viennacl::OPENCL_MEMORY:
-      viennacl::linalg::opencl::amg::amg_influence(A, amg_context, tag);
-      break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-    case viennacl::CUDA_MEMORY:
-      viennacl::linalg::cuda::amg::amg_influence(A, amg_context, tag);
-      break;
-#endif
-    case viennacl::MEMORY_NOT_INITIALIZED:
-      throw memory_exception("not initialised!");
-    default:
-      throw memory_exception("not implemented");
-  }
-}
-
-
-template<typename NumericT, typename AMGContextT>
-void amg_coarse(compressed_matrix<NumericT> const & A, AMGContextT & 
amg_context, amg_tag & tag)
-{
-  switch (viennacl::traits::handle(A).get_active_handle_id())
-  {
-    case viennacl::MAIN_MEMORY:
-      viennacl::linalg::host_based::amg::amg_coarse(A, amg_context, tag);
-      break;
-#ifdef VIENNACL_WITH_OPENCL
-    case viennacl::OPENCL_MEMORY:
-      viennacl::linalg::opencl::amg::amg_coarse(A, amg_context, tag);
-      break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-    case viennacl::CUDA_MEMORY:
-      viennacl::linalg::cuda::amg::amg_coarse(A, amg_context, tag);
-      break;
-#endif
-    case viennacl::MEMORY_NOT_INITIALIZED:
-      throw memory_exception("not initialised!");
-    default:
-      throw memory_exception("not implemented");
-  }
-}
-
-
-template<typename NumericT, typename AMGContextT>
-void amg_interpol(compressed_matrix<NumericT> const & A,
-                  compressed_matrix<NumericT>       & P,
-                  AMGContextT & amg_context,
-                  amg_tag & tag)
-{
-  switch (viennacl::traits::handle(A).get_active_handle_id())
-  {
-    case viennacl::MAIN_MEMORY:
-      viennacl::linalg::host_based::amg::amg_interpol(A, P, amg_context, tag);
-      break;
-#ifdef VIENNACL_WITH_OPENCL
-    case viennacl::OPENCL_MEMORY:
-      viennacl::linalg::opencl::amg::amg_interpol(A, P, amg_context, tag);
-      break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-    case viennacl::CUDA_MEMORY:
-      viennacl::linalg::cuda::amg::amg_interpol(A, P, amg_context, tag);
-      break;
-#endif
-    case viennacl::MEMORY_NOT_INITIALIZED:
-      throw memory_exception("not initialised!");
-    default:
-      throw memory_exception("not implemented");
-  }
-}
-
-
-template<typename NumericT>
-void amg_transpose(compressed_matrix<NumericT> & A,
-                   compressed_matrix<NumericT> & B)
-{
-  viennacl::context orig_ctx = viennacl::traits::context(A);
-  viennacl::context cpu_ctx(viennacl::MAIN_MEMORY);
-  (void)orig_ctx;
-  (void)cpu_ctx;
-
-  switch (viennacl::traits::handle(A).get_active_handle_id())
-  {
-    case viennacl::MAIN_MEMORY:
-      viennacl::linalg::host_based::amg::amg_transpose(A, B);
-      break;
-#ifdef VIENNACL_WITH_OPENCL
-    case viennacl::OPENCL_MEMORY:
-      A.switch_memory_context(cpu_ctx);
-      B.switch_memory_context(cpu_ctx);
-      viennacl::linalg::host_based::amg::amg_transpose(A, B);
-      A.switch_memory_context(orig_ctx);
-      B.switch_memory_context(orig_ctx);
-      break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-    case viennacl::CUDA_MEMORY:
-      A.switch_memory_context(cpu_ctx);
-      B.switch_memory_context(cpu_ctx);
-      viennacl::linalg::host_based::amg::amg_transpose(A, B);
-      A.switch_memory_context(orig_ctx);
-      B.switch_memory_context(orig_ctx);
-      //viennacl::linalg::cuda::amg_transpose(A, B);
-      break;
-#endif
-    case viennacl::MEMORY_NOT_INITIALIZED:
-      throw memory_exception("not initialised!");
-    default:
-      throw memory_exception("not implemented");
-  }
-}
-
-/** Assign sparse matrix A to dense matrix B */
-template<typename SparseMatrixType, typename NumericT>
-typename viennacl::enable_if< 
viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type
-assign_to_dense(SparseMatrixType const & A,
-                viennacl::matrix_base<NumericT> & B)
-{
-  assert( (A.size1() == B.size1()) && bool("Size check failed for assignment 
to dense matrix: size1(A) != size1(B)"));
-  assert( (A.size2() == B.size1()) && bool("Size check failed for assignment 
to dense matrix: size2(A) != size2(B)"));
-
-  switch (viennacl::traits::handle(A).get_active_handle_id())
-  {
-    case viennacl::MAIN_MEMORY:
-      viennacl::linalg::host_based::amg::assign_to_dense(A, B);
-      break;
-#ifdef VIENNACL_WITH_OPENCL
-    case viennacl::OPENCL_MEMORY:
-      viennacl::linalg::opencl::amg::assign_to_dense(A, B);
-      break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-    case viennacl::CUDA_MEMORY:
-      viennacl::linalg::cuda::amg::assign_to_dense(A, B);
-      break;
-#endif
-    case viennacl::MEMORY_NOT_INITIALIZED:
-      throw memory_exception("not initialised!");
-    default:
-      throw memory_exception("not implemented");
-  }
-}
-
-template<typename NumericT>
-void smooth_jacobi(unsigned int iterations,
-                   compressed_matrix<NumericT> const & A,
-                   vector<NumericT> & x,
-                   vector<NumericT> & x_backup,
-                   vector<NumericT> const & rhs_smooth,
-                   NumericT weight)
-{
-  switch (viennacl::traits::handle(A).get_active_handle_id())
-  {
-    case viennacl::MAIN_MEMORY:
-      viennacl::linalg::host_based::amg::smooth_jacobi(iterations, A, x, 
x_backup, rhs_smooth, weight);
-      break;
-#ifdef VIENNACL_WITH_OPENCL
-    case viennacl::OPENCL_MEMORY:
-      viennacl::linalg::opencl::amg::smooth_jacobi(iterations, A, x, x_backup, 
rhs_smooth, weight);
-      break;
-#endif
-#ifdef VIENNACL_WITH_CUDA
-    case viennacl::CUDA_MEMORY:
-      viennacl::linalg::cuda::amg::smooth_jacobi(iterations, A, x, x_backup, 
rhs_smooth, weight);
-      break;
-#endif
-    case viennacl::MEMORY_NOT_INITIALIZED:
-      throw memory_exception("not initialised!");
-    default:
-      throw memory_exception("not implemented");
-  }
-}
-
-} //namespace amg
-} //namespace detail
-} //namespace linalg
-} //namespace viennacl
-
-
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp
deleted file mode 100644
index 57bc89a..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp
+++ /dev/null
@@ -1,598 +0,0 @@
-#ifndef VIENNACL_LINALG_BICGSTAB_HPP_
-#define VIENNACL_LINALG_BICGSTAB_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file bicgstab.hpp
-    @brief The stabilized bi-conjugate gradient method is implemented here
-*/
-
-#include <vector>
-#include <cmath>
-#include <numeric>
-
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/traits/clear.hpp"
-#include "viennacl/traits/size.hpp"
-#include "viennacl/traits/context.hpp"
-#include "viennacl/meta/result_of.hpp"
-#include "viennacl/linalg/iterative_operations.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-
-/** @brief A tag for the stabilized Bi-conjugate gradient solver. Used for 
supplying solver parameters and for dispatching the solve() function
-*/
-class bicgstab_tag
-{
-public:
-  /** @brief The constructor
-  *
-  * @param tol              Relative tolerance for the residual (solver quits 
if ||r|| < tol * ||r_initial||)
-  * @param max_iters        The maximum number of iterations
-  * @param max_iters_before_restart   The maximum number of iterations before 
BiCGStab is reinitialized (to avoid accumulation of round-off errors)
-  */
-  bicgstab_tag(double tol = 1e-8, vcl_size_t max_iters = 400, vcl_size_t 
max_iters_before_restart = 200)
-    : tol_(tol), abs_tol_(0), iterations_(max_iters), 
iterations_before_restart_(max_iters_before_restart) {}
-
-  /** @brief Returns the relative tolerance */
-  double tolerance() const { return tol_; }
-
-  /** @brief Returns the absolute tolerance */
-  double abs_tolerance() const { return abs_tol_; }
-  /** @brief Sets the absolute tolerance */
-  void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; }
-
-  /** @brief Returns the maximum number of iterations */
-  vcl_size_t max_iterations() const { return iterations_; }
-  /** @brief Returns the maximum number of iterations before a restart*/
-  vcl_size_t max_iterations_before_restart() const { return 
iterations_before_restart_; }
-
-  /** @brief Return the number of solver iterations: */
-  vcl_size_t iters() const { return iters_taken_; }
-  void iters(vcl_size_t i) const { iters_taken_ = i; }
-
-  /** @brief Returns the estimated relative error at the end of the solver run 
*/
-  double error() const { return last_error_; }
-  /** @brief Sets the estimated relative error at the end of the solver run */
-  void error(double e) const { last_error_ = e; }
-
-private:
-  double tol_;
-  double abs_tol_;
-  vcl_size_t iterations_;
-  vcl_size_t iterations_before_restart_;
-
-  //return values from solver
-  mutable vcl_size_t iters_taken_;
-  mutable double last_error_;
-};
-
-
-
-namespace detail
-{
-  /** @brief Implementation of a pipelined stabilized Bi-conjugate gradient 
solver */
-  template<typename MatrixT, typename NumericT>
-  viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType 
const & A,
-                                             viennacl::vector_base<NumericT> 
const & rhs,
-                                             bicgstab_tag const & tag,
-                                             viennacl::linalg::no_precond,
-                                             bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                             void *monitor_data = NULL)
-  {
-    viennacl::vector<NumericT> result = 
viennacl::zero_vector<NumericT>(rhs.size(), viennacl::traits::context(rhs));
-
-    viennacl::vector<NumericT> residual = rhs;
-    viennacl::vector<NumericT> p = rhs;
-    viennacl::vector<NumericT> r0star = rhs;
-    viennacl::vector<NumericT> Ap = rhs;
-    viennacl::vector<NumericT> s  = rhs;
-    viennacl::vector<NumericT> As = rhs;
-
-    // Layout of temporary buffer:
-    //  chunk 0: <residual, r_0^*>
-    //  chunk 1: <As, As>
-    //  chunk 2: <As, s>
-    //  chunk 3: <Ap, r_0^*>
-    //  chunk 4: <As, r_0^*>
-    //  chunk 5: <s, s>
-    vcl_size_t buffer_size_per_vector = 256;
-    vcl_size_t num_buffer_chunks = 6;
-    viennacl::vector<NumericT> inner_prod_buffer = 
viennacl::zero_vector<NumericT>(num_buffer_chunks*buffer_size_per_vector, 
viennacl::traits::context(rhs)); // temporary buffer
-    std::vector<NumericT>      
host_inner_prod_buffer(inner_prod_buffer.size());
-
-    NumericT norm_rhs_host = viennacl::linalg::norm_2(residual);
-    NumericT beta;
-    NumericT alpha;
-    NumericT omega;
-    NumericT residual_norm = norm_rhs_host;
-    inner_prod_buffer[0] = norm_rhs_host * norm_rhs_host;
-
-    NumericT  r_dot_r0 = 0;
-    NumericT As_dot_As = 0;
-    NumericT As_dot_s  = 0;
-    NumericT Ap_dot_r0 = 0;
-    NumericT As_dot_r0 = 0;
-    NumericT  s_dot_s  = 0;
-
-    if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm 
is zero
-      return result;
-
-    for (vcl_size_t i = 0; i < tag.max_iterations(); ++i)
-    {
-      tag.iters(i+1);
-      // Ap = A*p_j
-      // Ap_dot_r0 = <Ap, r_0^*>
-      viennacl::linalg::pipelined_bicgstab_prod(A, p, Ap, r0star,
-                                                inner_prod_buffer, 
buffer_size_per_vector, 3*buffer_size_per_vector);
-
-      //////// first (weak) synchronization point ////
-
-      ///// method 1: compute alpha on host:
-      //
-      //// we only need the second chunk of the buffer for computing Ap_dot_r0:
-      //viennacl::fast_copy(inner_prod_buffer.begin(), 
inner_prod_buffer.end(), host_inner_prod_buffer.begin());
-      //Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() +     
buffer_size_per_vector, host_inner_prod_buffer.begin() + 2 * 
buffer_size_per_vector, ScalarType(0));
-
-      //alpha = residual_dot_r0 / Ap_dot_r0;
-
-      //// s_j = r_j - alpha_j q_j
-      //s = residual - alpha * Ap;
-
-      ///// method 2: compute alpha on device:
-      // s = r - alpha * Ap
-      // <s, s> first stage
-      // dump alpha at end of inner_prod_buffer
-      viennacl::linalg::pipelined_bicgstab_update_s(s, residual, Ap,
-                                                    inner_prod_buffer, 
buffer_size_per_vector, 5*buffer_size_per_vector);
-
-      // As = A*s_j
-      // As_dot_As = <As, As>
-      // As_dot_s  = <As, s>
-      // As_dot_r0 = <As, r_0^*>
-      viennacl::linalg::pipelined_bicgstab_prod(A, s, As, r0star,
-                                                inner_prod_buffer, 
buffer_size_per_vector, 4*buffer_size_per_vector);
-
-      //////// second (strong) synchronization point ////
-
-      viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), 
host_inner_prod_buffer.begin());
-
-      typedef typename std::vector<NumericT>::difference_type       
difference_type;
-
-       r_dot_r0 = std::accumulate(host_inner_prod_buffer.begin(),              
                                 host_inner_prod_buffer.begin() + 
difference_type(    buffer_size_per_vector), NumericT(0));
-      As_dot_As = std::accumulate(host_inner_prod_buffer.begin() + 
difference_type(    buffer_size_per_vector), host_inner_prod_buffer.begin() + 
difference_type(2 * buffer_size_per_vector), NumericT(0));
-      As_dot_s  = std::accumulate(host_inner_prod_buffer.begin() + 
difference_type(2 * buffer_size_per_vector), host_inner_prod_buffer.begin() + 
difference_type(3 * buffer_size_per_vector), NumericT(0));
-      Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + 
difference_type(3 * buffer_size_per_vector), host_inner_prod_buffer.begin() + 
difference_type(4 * buffer_size_per_vector), NumericT(0));
-      As_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + 
difference_type(4 * buffer_size_per_vector), host_inner_prod_buffer.begin() + 
difference_type(5 * buffer_size_per_vector), NumericT(0));
-       s_dot_s  = std::accumulate(host_inner_prod_buffer.begin() + 
difference_type(5 * buffer_size_per_vector), host_inner_prod_buffer.begin() + 
difference_type(6 * buffer_size_per_vector), NumericT(0));
-
-      alpha =   r_dot_r0 / Ap_dot_r0;
-      beta  = - As_dot_r0 / Ap_dot_r0;
-      omega =   As_dot_s  / As_dot_As;
-
-      residual_norm = std::sqrt(s_dot_s - NumericT(2.0) * omega * As_dot_s + 
omega * omega *  As_dot_As);
-      if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), 
monitor_data))
-        break;
-      if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance() || 
residual_norm < tag.abs_tolerance())
-        break;
-
-      // x_{j+1} = x_j + alpha * p_j + omega * s_j
-      // r_{j+1} = s_j - omega * t_j
-      // p_{j+1} = r_{j+1} + beta * (p_j - omega * q_j)
-      // and compute first stage of r_dot_r0 = <r_{j+1}, r_o^*> for use in 
next iteration
-       viennacl::linalg::pipelined_bicgstab_vector_update(result, alpha, p, 
omega, s,
-                                                          residual, As,
-                                                          beta, Ap,
-                                                          r0star, 
inner_prod_buffer, buffer_size_per_vector);
-    }
-
-    //store last error estimate:
-    tag.error(residual_norm / norm_rhs_host);
-
-    return result;
-  }
-
-  /** @brief Overload for the pipelined CG implementation for the ViennaCL 
sparse matrix types */
-  template<typename NumericT>
-  viennacl::vector<NumericT> solve_impl(viennacl::compressed_matrix<NumericT> 
const & A,
-                                        viennacl::vector<NumericT> const & rhs,
-                                        bicgstab_tag const & tag,
-                                        viennacl::linalg::no_precond,
-                                        bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                        void *monitor_data = NULL)
-  {
-    return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), 
monitor, monitor_data);
-  }
-
-
-  /** @brief Overload for the pipelined CG implementation for the ViennaCL 
sparse matrix types */
-  template<typename NumericT>
-  viennacl::vector<NumericT> solve_impl(viennacl::coordinate_matrix<NumericT> 
const & A,
-                                        viennacl::vector<NumericT> const & rhs,
-                                        bicgstab_tag const & tag,
-                                        viennacl::linalg::no_precond,
-                                        bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                        void *monitor_data = NULL)
-  {
-    return detail::pipelined_solve(A, rhs, tag, 
viennacl::linalg::no_precond(), monitor, monitor_data);
-  }
-
-
-
-  /** @brief Overload for the pipelined CG implementation for the ViennaCL 
sparse matrix types */
-  template<typename NumericT>
-  viennacl::vector<NumericT> solve_impl(viennacl::ell_matrix<NumericT> const & 
A,
-                                        viennacl::vector<NumericT> const & rhs,
-                                        bicgstab_tag const & tag,
-                                        viennacl::linalg::no_precond,
-                                        bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                        void *monitor_data = NULL)
-  {
-    return detail::pipelined_solve(A, rhs, tag, 
viennacl::linalg::no_precond(), monitor, monitor_data);
-  }
-
-
-
-  /** @brief Overload for the pipelined CG implementation for the ViennaCL 
sparse matrix types */
-  template<typename NumericT>
-  viennacl::vector<NumericT> solve_impl(viennacl::sliced_ell_matrix<NumericT> 
const & A,
-                                        viennacl::vector<NumericT> const & rhs,
-                                        bicgstab_tag const & tag,
-                                        viennacl::linalg::no_precond,
-                                        bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                        void *monitor_data = NULL)
-  {
-    return detail::pipelined_solve(A, rhs, tag, 
viennacl::linalg::no_precond(), monitor, monitor_data);
-  }
-
-
-  /** @brief Overload for the pipelined CG implementation for the ViennaCL 
sparse matrix types */
-  template<typename NumericT>
-  viennacl::vector<NumericT> solve_impl(viennacl::hyb_matrix<NumericT> const & 
A,
-                                        viennacl::vector<NumericT> const & rhs,
-                                        bicgstab_tag const & tag,
-                                        viennacl::linalg::no_precond,
-                                        bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                        void *monitor_data = NULL)
-  {
-    return detail::pipelined_solve(A, rhs, tag, 
viennacl::linalg::no_precond(), monitor, monitor_data);
-  }
-
-
-  /** @brief Implementation of the unpreconditioned stabilized Bi-conjugate 
gradient solver
-  *
-  * Following the description in "Iterative Methods for Sparse Linear Systems" 
by Y. Saad
-  *
-  * @param matrix       The system matrix
-  * @param rhs          The load vector
-  * @param tag          Solver configuration tag
-  * @param monitor      A callback routine which is called at each GMRES 
restart
-  * @param monitor_data Data pointer to be passed to the callback routine to 
pass on user-specific data
-  * @return The result vector
-  */
-  template<typename MatrixT, typename VectorT>
-  VectorT solve_impl(MatrixT const & matrix,
-                     VectorT const & rhs,
-                     bicgstab_tag const & tag,
-                     viennacl::linalg::no_precond,
-                     bool (*monitor)(VectorT const &, typename 
viennacl::result_of::cpu_value_type<typename 
viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL,
-                     void *monitor_data = NULL)
-  {
-    typedef typename viennacl::result_of::value_type<VectorT>::type            
NumericType;
-    typedef typename viennacl::result_of::cpu_value_type<NumericType>::type    
CPU_NumericType;
-    VectorT result = rhs;
-    viennacl::traits::clear(result);
-
-    VectorT residual = rhs;
-    VectorT p = rhs;
-    VectorT r0star = rhs;
-    VectorT tmp0 = rhs;
-    VectorT tmp1 = rhs;
-    VectorT s = rhs;
-
-    CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual);
-    CPU_NumericType ip_rr0star = norm_rhs_host * norm_rhs_host;
-    CPU_NumericType beta;
-    CPU_NumericType alpha;
-    CPU_NumericType omega;
-    //ScalarType inner_prod_temp; //temporary variable for inner product 
computation
-    CPU_NumericType new_ip_rr0star = 0;
-    CPU_NumericType residual_norm = norm_rhs_host;
-
-    if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm 
is zero
-      return result;
-
-    bool restart_flag = true;
-    vcl_size_t last_restart = 0;
-    for (vcl_size_t i = 0; i < tag.max_iterations(); ++i)
-    {
-      if (restart_flag)
-      {
-        residual = viennacl::linalg::prod(matrix, result);
-        residual = rhs - residual;
-        p = residual;
-        r0star = residual;
-        ip_rr0star = viennacl::linalg::norm_2(residual);
-        ip_rr0star *= ip_rr0star;
-        restart_flag = false;
-        last_restart = i;
-      }
-
-      tag.iters(i+1);
-      tmp0 = viennacl::linalg::prod(matrix, p);
-      alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);
-
-      s = residual - alpha*tmp0;
-
-      tmp1 = viennacl::linalg::prod(matrix, s);
-      CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
-      omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);
-
-      result += alpha * p + omega * s;
-      residual = s - omega * tmp1;
-
-      new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);
-      residual_norm = viennacl::linalg::norm_2(residual);
-      if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), 
monitor_data))
-        break;
-      if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance() || 
residual_norm < tag.abs_tolerance())
-        break;
-
-      beta = new_ip_rr0star / ip_rr0star * alpha/omega;
-      ip_rr0star = new_ip_rr0star;
-
-      if (    (ip_rr0star <= 0 && ip_rr0star >= 0)
-           || (omega <= 0 && omega >= 0)
-           || (i - last_restart > tag.max_iterations_before_restart())
-         ) //search direction degenerate. A restart might help
-        restart_flag = true;
-
-      // Execution of
-      //  p = residual + beta * (p - omega*tmp0);
-      // without introducing temporary vectors:
-      p -= omega * tmp0;
-      p = residual + beta * p;
-    }
-
-    //store last error estimate:
-    tag.error(residual_norm / norm_rhs_host);
-
-    return result;
-  }
-
-
-  /** @brief Implementation of the preconditioned stabilized Bi-conjugate 
gradient solver
-  *
-  * Following the description of the unpreconditioned case in "Iterative 
Methods for Sparse Linear Systems" by Y. Saad
-  *
-  * @param matrix       The system matrix
-  * @param rhs          The load vector
-  * @param tag          Solver configuration tag
-  * @param precond      A preconditioner. Precondition operation is done via 
member function apply()
-  * @param monitor      A callback routine which is called at each GMRES 
restart
-  * @param monitor_data Data pointer to be passed to the callback routine to 
pass on user-specific data
-  * @return The result vector
-  */
-  template<typename MatrixT, typename VectorT, typename PreconditionerT>
-  VectorT solve_impl(MatrixT const & matrix,
-                     VectorT const & rhs,
-                     bicgstab_tag const & tag,
-                     PreconditionerT const & precond,
-                     bool (*monitor)(VectorT const &, typename 
viennacl::result_of::cpu_value_type<typename 
viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL,
-                     void *monitor_data = NULL)
-  {
-    typedef typename viennacl::result_of::value_type<VectorT>::type            
NumericType;
-    typedef typename viennacl::result_of::cpu_value_type<NumericType>::type    
CPU_NumericType;
-    VectorT result = rhs;
-    viennacl::traits::clear(result);
-
-    VectorT residual = rhs;
-    VectorT r0star = residual;  //can be chosen arbitrarily in fact
-    VectorT tmp0 = rhs;
-    VectorT tmp1 = rhs;
-    VectorT s = rhs;
-
-    VectorT p = residual;
-
-    CPU_NumericType ip_rr0star = viennacl::linalg::norm_2(residual);
-    CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual);
-    CPU_NumericType beta;
-    CPU_NumericType alpha;
-    CPU_NumericType omega;
-    CPU_NumericType new_ip_rr0star = 0;
-    CPU_NumericType residual_norm = norm_rhs_host;
-
-    if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm 
is zero
-      return result;
-
-    bool restart_flag = true;
-    vcl_size_t last_restart = 0;
-    for (unsigned int i = 0; i < tag.max_iterations(); ++i)
-    {
-      if (restart_flag)
-      {
-        residual = viennacl::linalg::prod(matrix, result);
-        residual = rhs - residual;
-        precond.apply(residual);
-        p = residual;
-        r0star = residual;
-        ip_rr0star = viennacl::linalg::norm_2(residual);
-        ip_rr0star *= ip_rr0star;
-        restart_flag = false;
-        last_restart = i;
-      }
-
-      tag.iters(i+1);
-      tmp0 = viennacl::linalg::prod(matrix, p);
-      precond.apply(tmp0);
-      alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);
-
-      s = residual - alpha*tmp0;
-
-      tmp1 = viennacl::linalg::prod(matrix, s);
-      precond.apply(tmp1);
-      CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
-      omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);
-
-      result += alpha * p + omega * s;
-      residual = s - omega * tmp1;
-
-      residual_norm = viennacl::linalg::norm_2(residual);
-      if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), 
monitor_data))
-        break;
-      if (residual_norm / norm_rhs_host < tag.tolerance() || residual_norm < 
tag.abs_tolerance())
-        break;
-
-      new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);
-
-      beta = new_ip_rr0star / ip_rr0star * alpha/omega;
-      ip_rr0star = new_ip_rr0star;
-
-      if ( (ip_rr0star >= 0 && ip_rr0star <= 0) || (omega >=0 && omega <= 0) 
|| i - last_restart > tag.max_iterations_before_restart()) //search direction 
degenerate. A restart might help
-        restart_flag = true;
-
-      // Execution of
-      //  p = residual + beta * (p - omega*tmp0);
-      // without introducing temporary vectors:
-      p -= omega * tmp0;
-      p = residual + beta * p;
-
-      //std::cout << "Rel. Residual in current step: " << 
std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / 
norm_rhs_host)) << std::endl;
-    }
-
-    //store last error estimate:
-    tag.error(residual_norm / norm_rhs_host);
-
-    return result;
-  }
-
-}
-
-
-
-template<typename MatrixT, typename VectorT, typename PreconditionerT>
-VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const 
& tag, PreconditionerT const & precond)
-{
-  return detail::solve_impl(matrix, rhs, tag, precond);
-}
-
-
-/** @brief Convenience overload for calling the preconditioned BiCGStab solver 
using types from the C++ STL.
-  *
-  * A std::vector<std::map<T, U> > matrix is convenient for e.g. finite 
element assembly.
-  * It is not the fastest option for setting up a system, but often it is fast 
enough - particularly for just trying things out.
-  */
-template<typename IndexT, typename NumericT, typename PreconditionerT>
-std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & 
A, std::vector<NumericT> const & rhs, bicgstab_tag const & tag, PreconditionerT 
const & precond)
-{
-  viennacl::compressed_matrix<NumericT> vcl_A;
-  viennacl::copy(A, vcl_A);
-
-  viennacl::vector<NumericT> vcl_rhs(rhs.size());
-  viennacl::copy(rhs, vcl_rhs);
-
-  viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond);
-
-  std::vector<NumericT> result(vcl_result.size());
-  viennacl::copy(vcl_result, result);
-  return result;
-}
-
-/** @brief Entry point for the unpreconditioned BiCGStab method.
- *
- *  @param matrix    The system matrix
- *  @param rhs       Right hand side vector (load vector)
- *  @param tag       A BiCGStab tag providing relative tolerances, etc.
- */
-template<typename MatrixT, typename VectorT>
-VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const 
& tag)
-{
-  return solve(matrix, rhs, tag, viennacl::linalg::no_precond());
-}
-
-
-
-template<typename VectorT>
-class bicgstab_solver
-{
-public:
-  typedef typename viennacl::result_of::cpu_value_type<VectorT>::type   
numeric_type;
-
-  bicgstab_solver(bicgstab_tag const & tag) : tag_(tag), 
monitor_callback_(NULL), user_data_(NULL) {}
-
-  template<typename MatrixT, typename PreconditionerT>
-  VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT 
const & precond) const
-  {
-    if (viennacl::traits::size(init_guess_) > 0) // take initial guess into 
account
-    {
-      VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_);
-      mod_rhs = b - mod_rhs;
-      VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, 
monitor_callback_, user_data_);
-      return init_guess_ + y;
-    }
-    return detail::solve_impl(A, b, tag_, precond, monitor_callback_, 
user_data_);
-  }
-
-
-  template<typename MatrixT>
-  VectorT operator()(MatrixT const & A, VectorT const & b) const
-  {
-    return operator()(A, b, viennacl::linalg::no_precond());
-  }
-
-  /** @brief Specifies an initial guess for the iterative solver.
-    *
-    * An iterative solver for Ax = b with initial guess x_0 is equivalent to 
an iterative solver for Ay = b' := b - Ax_0, where x = x_0 + y.
-    */
-  void set_initial_guess(VectorT const & x) { init_guess_ = x; }
-
-  /** @brief Sets a monitor function pointer to be called in each iteration. 
Set to NULL to run without monitor.
-   *
-   *  The monitor function is called with the current guess for the result as 
first argument and the current relative residual estimate as second argument.
-   *  The third argument is a pointer to user-defined data, through which 
additional information can be passed.
-   *  This pointer needs to be set with set_monitor_data. If not set, NULL is 
passed.
-   *  If the montior function returns true, the solver terminates (either 
convergence or divergence).
-   */
-  void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), 
void *user_data)
-  {
-    monitor_callback_ = monitor_fun;
-    user_data_ = user_data;
-  }
-
-  /** @brief Returns the solver tag containing basic configuration such as 
tolerances, etc. */
-  bicgstab_tag const & tag() const { return tag_; }
-
-private:
-  bicgstab_tag  tag_;
-  VectorT       init_guess_;
-  bool          (*monitor_callback_)(VectorT const &, numeric_type, void *);
-  void          *user_data_;
-};
-
-
-}
-}
-
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp
deleted file mode 100644
index a2daf5e..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp
+++ /dev/null
@@ -1,179 +0,0 @@
-#ifndef VIENNACL_LINALG_BISECT_HPP_
-#define VIENNACL_LINALG_BISECT_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/bisect.hpp
-*   @brief Implementation of the algorithm for finding eigenvalues of a 
tridiagonal matrix.
-*
-*   Contributed by Guenther Mader and Astrid Rupp.
-*/
-
-#include <vector>
-#include <cmath>
-#include <limits>
-#include <cstddef>
-#include "viennacl/meta/result_of.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-
-namespace detail
-{
-  /**
-  *    @brief overloaded function for copying vectors
-  */
-  template<typename NumericT, typename OtherVectorT>
-  void copy_vec_to_vec(viennacl::vector<NumericT> const & src, OtherVectorT & 
dest)
-  {
-    viennacl::copy(src, dest);
-  }
-
-  template<typename OtherVectorT, typename NumericT>
-  void copy_vec_to_vec(OtherVectorT const & src, viennacl::vector<NumericT> & 
dest)
-  {
-    viennacl::copy(src, dest);
-  }
-
-  template<typename VectorT1, typename VectorT2>
-  void copy_vec_to_vec(VectorT1 const & src, VectorT2 & dest)
-  {
-    for (vcl_size_t i=0; i<src.size(); ++i)
-      dest[i] = src[i];
-  }
-
-} //namespace detail
-
-/**
-*   @brief Implementation of the bisect-algorithm for the calculation of the 
eigenvalues of a tridiagonal matrix. Experimental - interface might change.
-*
-*   Refer to "Calculation of the Eigenvalues of a Symmetric Tridiagonal Matrix 
by the Method of Bisection" in the Handbook Series Linear Algebra, contributed 
by Barth, Martin, and Wilkinson.
-*   http://www.maths.ed.ac.uk/~aar/papers/bamawi.pdf
-*
-*   @param alphas       Elements of the main diagonal
-*   @param betas        Elements of the secondary diagonal
-*   @return             Returns the eigenvalues of the tridiagonal matrix 
defined by alpha and beta
-*/
-template<typename VectorT>
-std::vector<
-        typename viennacl::result_of::cpu_value_type<typename 
VectorT::value_type>::type
-        >
-bisect(VectorT const & alphas, VectorT const & betas)
-{
-  typedef typename viennacl::result_of::value_type<VectorT>::type           
NumericType;
-  typedef typename viennacl::result_of::cpu_value_type<NumericType>::type   
CPU_NumericType;
-
-  vcl_size_t size = betas.size();
-  std::vector<CPU_NumericType>  x_temp(size);
-
-
-  std::vector<CPU_NumericType> beta_bisect;
-  std::vector<CPU_NumericType> wu;
-
-  double rel_error = std::numeric_limits<CPU_NumericType>::epsilon();
-  beta_bisect.push_back(0);
-
-  for (vcl_size_t i = 1; i < size; i++)
-    beta_bisect.push_back(betas[i] * betas[i]);
-
-  double xmin = alphas[size - 1] - std::fabs(betas[size - 1]);
-  double xmax = alphas[size - 1] + std::fabs(betas[size - 1]);
-
-  for (vcl_size_t i = 0; i < size - 1; i++)
-  {
-    double h = std::fabs(betas[i]) + std::fabs(betas[i + 1]);
-    if (alphas[i] + h > xmax)
-      xmax = alphas[i] + h;
-    if (alphas[i] - h < xmin)
-      xmin = alphas[i] - h;
-  }
-
-
-  double eps1 = 1e-6;
-  /*double eps2 = (xmin + xmax > 0) ? (rel_error * xmax) : (-rel_error * xmin);
-  if (eps1 <= 0)
-    eps1 = eps2;
-  else
-    eps2 = 0.5 * eps1 + 7.0 * eps2; */
-
-  double x0 = xmax;
-
-  for (vcl_size_t i = 0; i < size; i++)
-  {
-    x_temp[i] = xmax;
-    wu.push_back(xmin);
-  }
-
-  for (long k = static_cast<long>(size) - 1; k >= 0; --k)
-  {
-    double xu = xmin;
-    for (long i = k; i >= 0; --i)
-    {
-      if (xu < wu[vcl_size_t(k-i)])
-      {
-        xu = wu[vcl_size_t(i)];
-        break;
-      }
-    }
-
-    if (x0 > x_temp[vcl_size_t(k)])
-      x0 = x_temp[vcl_size_t(k)];
-
-    double x1 = (xu + x0) / 2.0;
-    while (x0 - xu > 2.0 * rel_error * (std::fabs(xu) + std::fabs(x0)) + eps1)
-    {
-      vcl_size_t a = 0;
-      double q = 1;
-      for (vcl_size_t i = 0; i < size; i++)
-      {
-        if (q > 0 || q < 0)
-          q = alphas[i] - x1 - beta_bisect[i] / q;
-        else
-          q = alphas[i] - x1 - std::fabs(betas[i] / rel_error);
-
-        if (q < 0)
-          a++;
-      }
-
-      if (a <= static_cast<vcl_size_t>(k))
-      {
-        xu = x1;
-        if (a < 1)
-          wu[0] = x1;
-        else
-        {
-          wu[a] = x1;
-          if (x_temp[a - 1] > x1)
-              x_temp[a - 1] = x1;
-        }
-      }
-      else
-        x0 = x1;
-
-      x1 = (xu + x0) / 2.0;
-    }
-    x_temp[vcl_size_t(k)] = x1;
-  }
-  return x_temp;
-}
-
-} // end namespace linalg
-} // end namespace viennacl
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp
deleted file mode 100644
index 6918b14..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp
+++ /dev/null
@@ -1,173 +0,0 @@
-#ifndef VIENNACL_LINALG_BISECT_GPU
-#define VIENNACL_LINALG_BISECT_GPU
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-
-/** @file viennacl/linalg/bisect_gpu.hpp
-    @brief Implementation of an bisection algorithm for eigenvalues
-
-    Implementation based on the sample provided with the CUDA 6.0 SDK, for 
which
-    the creation of derivative works is allowed by including the following 
statement:
-    "This software contains source code provided by NVIDIA Corporation."
-*/
-// includes, system
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "viennacl/scalar.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/matrix.hpp"
-
-// includes, project
-#include "viennacl/linalg/detail/bisect/structs.hpp"
-#include "viennacl/linalg/detail/bisect/gerschgorin.hpp"
-#include "viennacl/linalg/detail/bisect/bisect_large.hpp"
-#include "viennacl/linalg/detail/bisect/bisect_small.hpp"
-
-
-namespace viennacl
-{
-namespace linalg
-{
-///////////////////////////////////////////////////////////////////////////
-//! @brief bisect           The bisection algorithm computes the eigevalues
-//!                         of a symmetric tridiagonal matrix.
-//! @param diagonal         diagonal elements of the matrix
-//! @param superdiagonal    superdiagonal elements of the matrix
-//! @param eigenvalues      Vectors with the eigenvalues in ascending order
-//! @return                 return false if any errors occured
-///
-//! overloaded function template: std::vectors as parameters
-template<typename NumericT>
-bool
-bisect(const std::vector<NumericT> & diagonal, const std::vector<NumericT> & 
superdiagonal, std::vector<NumericT> & eigenvalues)
-{
-  assert(diagonal.size() == superdiagonal.size() &&
-         diagonal.size() == eigenvalues.size()   &&
-         bool("Input vectors do not have the same sizes!"));
-  bool bResult = false;
-  // flag if the matrix size is due to explicit user request
-  // desired precision of eigenvalues
-  NumericT  precision = static_cast<NumericT>(0.00001);
-  const unsigned int mat_size = static_cast<unsigned int>(diagonal.size());
-
-  // set up input
-  viennacl::linalg::detail::InputData<NumericT> input(diagonal, superdiagonal, 
mat_size);
-
-  NumericT lg =  FLT_MAX;
-  NumericT ug = -FLT_MAX;
-  // compute Gerschgorin interval
-  viennacl::linalg::detail::computeGerschgorin(input.std_a, input.std_b, 
mat_size, lg, ug);
-
-  // decide wheter the algorithm for small or for large matrices will be 
started
-  if (mat_size <= VIENNACL_BISECT_MAX_SMALL_MATRIX)
-  {
-    // initialize memory for result
-    viennacl::linalg::detail::ResultDataSmall<NumericT> result(mat_size);
-
-    // run the kernel
-    viennacl::linalg::detail::computeEigenvaluesSmallMatrix(input, result, 
mat_size, lg, ug, precision);
-
-    // get the result from the device and do some sanity checks,
-    viennacl::linalg::detail::processResultSmallMatrix(result, mat_size);
-    eigenvalues = result.std_eigenvalues;
-    bResult = true;
-  }
-
-  else
-  {
-    // initialize memory for result
-    viennacl::linalg::detail::ResultDataLarge<NumericT> result(mat_size);
-
-    // run the kernel
-    viennacl::linalg::detail::computeEigenvaluesLargeMatrix(input, result, 
mat_size, lg, ug, precision);
-
-    // get the result from the device and do some sanity checks
-    bResult = viennacl::linalg::detail::processResultDataLargeMatrix(result, 
mat_size);
-
-    eigenvalues = result.std_eigenvalues;
-  }
-  return bResult;
-}
-
-
-///////////////////////////////////////////////////////////////////////////
-//! @brief bisect           The bisection algorithm computes the eigevalues
-//!                         of a symmetric tridiagonal matrix.
-//! @param diagonal         diagonal elements of the matrix
-//! @param superdiagonal    superdiagonal elements of the matrix
-//! @param eigenvalues      Vectors with the eigenvalues in ascending order
-//! @return                 return false if any errors occured
-///
-//! overloaded function template: viennacl::vectors as parameters
-template<typename NumericT>
-bool
-bisect(const viennacl::vector<NumericT> & diagonal, const 
viennacl::vector<NumericT> & superdiagonal, viennacl::vector<NumericT> & 
eigenvalues)
-{
-  assert(diagonal.size() == superdiagonal.size() &&
-         diagonal.size() == eigenvalues.size()   &&
-         bool("Input vectors do not have the same sizes!"));
-  bool bResult = false;
-  // flag if the matrix size is due to explicit user request
-  // desired precision of eigenvalues
-  NumericT  precision = static_cast<NumericT>(0.00001);
-  const unsigned int mat_size = static_cast<unsigned int>(diagonal.size());
-
-  // set up input
-  viennacl::linalg::detail::InputData<NumericT> input(diagonal, superdiagonal, 
mat_size);
-
-  NumericT lg =  FLT_MAX;
-  NumericT ug = -FLT_MAX;
-  // compute Gerschgorin interval
-  viennacl::linalg::detail::computeGerschgorin(input.std_a, input.std_b, 
mat_size, lg, ug);
-
-  // decide wheter the algorithm for small or for large matrices will be 
started
-  if (mat_size <= VIENNACL_BISECT_MAX_SMALL_MATRIX)
-  {
-    // initialize memory for result
-    viennacl::linalg::detail::ResultDataSmall<NumericT> result(mat_size);
-
-    // run the kernel
-    viennacl::linalg::detail::computeEigenvaluesSmallMatrix(input, result, 
mat_size, lg, ug, precision);
-
-    // get the result from the device and do some sanity checks,
-    viennacl::linalg::detail::processResultSmallMatrix(result, mat_size);
-    copy(result.std_eigenvalues, eigenvalues);
-    bResult = true;
-  }
-
-  else
-  {
-    // initialize memory for result
-    viennacl::linalg::detail::ResultDataLarge<NumericT> result(mat_size);
-
-    // run the kernel
-    viennacl::linalg::detail::computeEigenvaluesLargeMatrix(input, result, 
mat_size, lg, ug, precision);
-
-    // get the result from the device and do some sanity checks
-    bResult = viennacl::linalg::detail::processResultDataLargeMatrix(result, 
mat_size);
-
-    copy(result.std_eigenvalues, eigenvalues);
-  }
-  return bResult;
-}
-} // namespace linalg
-} // namespace viennacl
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp
----------------------------------------------------------------------
diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp
deleted file mode 100644
index 93aae81..0000000
--- a/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp
+++ /dev/null
@@ -1,440 +0,0 @@
-#ifndef VIENNACL_LINALG_CG_HPP_
-#define VIENNACL_LINALG_CG_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/cg.hpp
-    @brief The conjugate gradient method is implemented here
-*/
-
-#include <vector>
-#include <map>
-#include <cmath>
-#include <numeric>
-
-#include "viennacl/forwards.h"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/linalg/ilu.hpp"
-#include "viennacl/linalg/prod.hpp"
-#include "viennacl/linalg/inner_prod.hpp"
-#include "viennacl/linalg/norm_2.hpp"
-#include "viennacl/traits/clear.hpp"
-#include "viennacl/traits/size.hpp"
-#include "viennacl/meta/result_of.hpp"
-#include "viennacl/linalg/iterative_operations.hpp"
-
-namespace viennacl
-{
-namespace linalg
-{
-
-/** @brief A tag for the conjugate gradient Used for supplying solver 
parameters and for dispatching the solve() function
-*/
-class cg_tag
-{
-public:
-  /** @brief The constructor
-  *
-  * @param tol              Relative tolerance for the residual (solver quits 
if ||r|| < tol * ||r_initial||)
-  * @param max_iterations   The maximum number of iterations
-  */
-  cg_tag(double tol = 1e-8, unsigned int max_iterations = 300) : tol_(tol), 
abs_tol_(0), iterations_(max_iterations) {}
-
-  /** @brief Returns the relative tolerance */
-  double tolerance() const { return tol_; }
-
-  /** @brief Returns the absolute tolerance */
-  double abs_tolerance() const { return abs_tol_; }
-  /** @brief Sets the absolute tolerance */
-  void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; }
-
-  /** @brief Returns the maximum number of iterations */
-  unsigned int max_iterations() const { return iterations_; }
-
-  /** @brief Return the number of solver iterations: */
-  unsigned int iters() const { return iters_taken_; }
-  void iters(unsigned int i) const { iters_taken_ = i; }
-
-  /** @brief Returns the estimated relative error at the end of the solver run 
*/
-  double error() const { return last_error_; }
-  /** @brief Sets the estimated relative error at the end of the solver run */
-  void error(double e) const { last_error_ = e; }
-
-
-private:
-  double tol_;
-  double abs_tol_;
-  unsigned int iterations_;
-
-  //return values from solver
-  mutable unsigned int iters_taken_;
-  mutable double last_error_;
-};
-
-namespace detail
-{
-
-  /** @brief handles the no_precond case at minimal overhead */
-  template<typename VectorT, typename PreconditionerT>
-  class z_handler{
-  public:
-    z_handler(VectorT & residual) : z_(residual){ }
-    VectorT & get() { return z_; }
-  private:
-    VectorT z_;
-  };
-
-  template<typename VectorT>
-  class z_handler<VectorT, viennacl::linalg::no_precond>{
-  public:
-    z_handler(VectorT & residual) : presidual_(&residual){ }
-    VectorT & get() { return *presidual_; }
-  private:
-    VectorT * presidual_;
-  };
-
-}
-
-namespace detail
-{
-
-  /** @brief Implementation of a pipelined conjugate gradient algorithm (no 
preconditioner), specialized for ViennaCL types.
-  *
-  * Pipelined version from A. T. Chronopoulos and C. W. Gear, J. Comput. Appl. 
Math. 25(2), 153–168 (1989)
-  *
-  * @param A            The system matrix
-  * @param rhs          The load vector
-  * @param tag          Solver configuration tag
-  * @param monitor      A callback routine which is called at each GMRES 
restart
-  * @param monitor_data Data pointer to be passed to the callback routine to 
pass on user-specific data
-  * @return The result vector
-  */
-  //template<typename MatrixType, typename ScalarType>
-  template<typename MatrixT, typename NumericT>
-  viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType 
const & A,
-                                             viennacl::vector<NumericT> const 
& rhs,
-                                             cg_tag const & tag,
-                                             viennacl::linalg::no_precond,
-                                             bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                             void *monitor_data = NULL)
-  {
-    typedef typename viennacl::vector<NumericT>::difference_type   
difference_type;
-
-    viennacl::vector<NumericT> result(rhs);
-    viennacl::traits::clear(result);
-
-    viennacl::vector<NumericT> residual(rhs);
-    viennacl::vector<NumericT> p(rhs);
-    viennacl::vector<NumericT> Ap = viennacl::linalg::prod(A, p);
-    viennacl::vector<NumericT> inner_prod_buffer = 
viennacl::zero_vector<NumericT>(3*256, viennacl::traits::context(rhs)); // 
temporary buffer
-    std::vector<NumericT>      
host_inner_prod_buffer(inner_prod_buffer.size());
-    vcl_size_t                 buffer_size_per_vector = 
inner_prod_buffer.size() / 3;
-    difference_type            buffer_offset_per_vector = 
static_cast<difference_type>(buffer_size_per_vector);
-
-    NumericT norm_rhs_squared = viennacl::linalg::norm_2(residual); 
norm_rhs_squared *= norm_rhs_squared;
-
-    if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //check 
for early convergence of A*x = 0
-      return result;
-
-    NumericT inner_prod_rr = norm_rhs_squared;
-    NumericT alpha = inner_prod_rr / viennacl::linalg::inner_prod(p, Ap);
-    NumericT beta  = viennacl::linalg::norm_2(Ap); beta = (alpha * alpha * 
beta * beta - inner_prod_rr) / inner_prod_rr;
-    NumericT inner_prod_ApAp = 0;
-    NumericT inner_prod_pAp  = 0;
-
-    for (unsigned int i = 0; i < tag.max_iterations(); ++i)
-    {
-      tag.iters(i+1);
-
-      viennacl::linalg::pipelined_cg_vector_update(result, alpha, p, residual, 
Ap, beta, inner_prod_buffer);
-      viennacl::linalg::pipelined_cg_prod(A, p, Ap, inner_prod_buffer);
-
-      // bring back the partial results to the host:
-      viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), 
host_inner_prod_buffer.begin());
-
-      inner_prod_rr   = std::accumulate(host_inner_prod_buffer.begin(),        
                        host_inner_prod_buffer.begin() +     
buffer_offset_per_vector, NumericT(0));
-      inner_prod_ApAp = std::accumulate(host_inner_prod_buffer.begin() +     
buffer_offset_per_vector, host_inner_prod_buffer.begin() + 2 * 
buffer_offset_per_vector, NumericT(0));
-      inner_prod_pAp  = std::accumulate(host_inner_prod_buffer.begin() + 2 * 
buffer_offset_per_vector, host_inner_prod_buffer.begin() + 3 * 
buffer_offset_per_vector, NumericT(0));
-
-      if (monitor && monitor(result, std::sqrt(std::fabs(inner_prod_rr / 
norm_rhs_squared)), monitor_data))
-        break;
-      if (std::fabs(inner_prod_rr / norm_rhs_squared) < tag.tolerance() *  
tag.tolerance() || std::fabs(inner_prod_rr) < tag.abs_tolerance() * 
tag.abs_tolerance())    //squared norms involved here
-        break;
-
-      alpha = inner_prod_rr / inner_prod_pAp;
-      beta  = (alpha*alpha*inner_prod_ApAp - inner_prod_rr) / inner_prod_rr;
-    }
-
-    //store last error estimate:
-    tag.error(std::sqrt(std::fabs(inner_prod_rr) / norm_rhs_squared));
-
-    return result;
-  }
-
-
-  /** @brief Overload for the pipelined CG implementation for the ViennaCL 
sparse matrix types */
-  template<typename NumericT>
-  viennacl::vector<NumericT> solve_impl(viennacl::compressed_matrix<NumericT> 
const & A,
-                                        viennacl::vector<NumericT> const & rhs,
-                                        cg_tag const & tag,
-                                        viennacl::linalg::no_precond,
-                                        bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                        void *monitor_data = NULL)
-  {
-    return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), 
monitor, monitor_data);
-  }
-
-
-  /** @brief Overload for the pipelined CG implementation for the ViennaCL 
sparse matrix types */
-  template<typename NumericT>
-  viennacl::vector<NumericT> solve_impl(viennacl::coordinate_matrix<NumericT> 
const & A,
-                                        viennacl::vector<NumericT> const & rhs,
-                                        cg_tag const & tag,
-                                        viennacl::linalg::no_precond,
-                                        bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                        void *monitor_data = NULL)
-  {
-    return detail::pipelined_solve(A, rhs, tag, 
viennacl::linalg::no_precond(), monitor, monitor_data);
-  }
-
-
-
-  /** @brief Overload for the pipelined CG implementation for the ViennaCL 
sparse matrix types */
-  template<typename NumericT>
-  viennacl::vector<NumericT> solve_impl(viennacl::ell_matrix<NumericT> const & 
A,
-                                        viennacl::vector<NumericT> const & rhs,
-                                        cg_tag const & tag,
-                                        viennacl::linalg::no_precond,
-                                        bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                        void *monitor_data = NULL)
-  {
-    return detail::pipelined_solve(A, rhs, tag, 
viennacl::linalg::no_precond(), monitor, monitor_data);
-  }
-
-
-
-  /** @brief Overload for the pipelined CG implementation for the ViennaCL 
sparse matrix types */
-  template<typename NumericT>
-  viennacl::vector<NumericT> solve_impl(viennacl::sliced_ell_matrix<NumericT> 
const & A,
-                                        viennacl::vector<NumericT> const & rhs,
-                                        cg_tag const & tag,
-                                        viennacl::linalg::no_precond,
-                                        bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                        void *monitor_data = NULL)
-  {
-    return detail::pipelined_solve(A, rhs, tag, 
viennacl::linalg::no_precond(), monitor, monitor_data);
-  }
-
-
-  /** @brief Overload for the pipelined CG implementation for the ViennaCL 
sparse matrix types */
-  template<typename NumericT>
-  viennacl::vector<NumericT> solve_impl(viennacl::hyb_matrix<NumericT> const & 
A,
-                                        viennacl::vector<NumericT> const & rhs,
-                                        cg_tag const & tag,
-                                        viennacl::linalg::no_precond,
-                                        bool 
(*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL,
-                                        void *monitor_data = NULL)
-  {
-    return detail::pipelined_solve(A, rhs, tag, 
viennacl::linalg::no_precond(), monitor, monitor_data);
-  }
-
-
-  template<typename MatrixT, typename VectorT, typename PreconditionerT>
-  VectorT solve_impl(MatrixT const & matrix,
-                     VectorT const & rhs,
-                     cg_tag const & tag,
-                     PreconditionerT const & precond,
-                     bool (*monitor)(VectorT const &, typename 
viennacl::result_of::cpu_value_type<typename 
viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL,
-                     void *monitor_data = NULL)
-  {
-    typedef typename viennacl::result_of::value_type<VectorT>::type           
NumericType;
-    typedef typename viennacl::result_of::cpu_value_type<NumericType>::type   
CPU_NumericType;
-
-    VectorT result = rhs;
-    viennacl::traits::clear(result);
-
-    VectorT residual = rhs;
-    VectorT tmp = rhs;
-    detail::z_handler<VectorT, PreconditionerT> zhandler(residual);
-    VectorT & z = zhandler.get();
-
-    precond.apply(z);
-    VectorT p = z;
-
-    CPU_NumericType ip_rr = viennacl::linalg::inner_prod(residual, z);
-    CPU_NumericType alpha;
-    CPU_NumericType new_ip_rr = 0;
-    CPU_NumericType beta;
-    CPU_NumericType norm_rhs_squared = ip_rr;
-    CPU_NumericType new_ipp_rr_over_norm_rhs;
-
-    if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) 
//solution is zero if RHS norm (squared) is zero
-      return result;
-
-    for (unsigned int i = 0; i < tag.max_iterations(); ++i)
-    {
-      tag.iters(i+1);
-      tmp = viennacl::linalg::prod(matrix, p);
-
-      alpha = ip_rr / viennacl::linalg::inner_prod(tmp, p);
-
-      result += alpha * p;
-      residual -= alpha * tmp;
-      z = residual;
-      precond.apply(z);
-
-      if (static_cast<VectorT*>(&residual)==static_cast<VectorT*>(&z))
-        new_ip_rr = std::pow(viennacl::linalg::norm_2(residual),2);
-      else
-        new_ip_rr = viennacl::linalg::inner_prod(residual, z);
-
-      new_ipp_rr_over_norm_rhs = new_ip_rr / norm_rhs_squared;
-      if (monitor && monitor(result, 
std::sqrt(std::fabs(new_ipp_rr_over_norm_rhs)), monitor_data))
-        break;
-      if (std::fabs(new_ipp_rr_over_norm_rhs) < tag.tolerance() *  
tag.tolerance() || std::fabs(new_ip_rr) < tag.abs_tolerance() * 
tag.abs_tolerance())    //squared norms involved here
-        break;
-
-      beta = new_ip_rr / ip_rr;
-      ip_rr = new_ip_rr;
-
-      p = z + beta*p;
-    }
-
-    //store last error estimate:
-    tag.error(std::sqrt(std::fabs(new_ip_rr / norm_rhs_squared)));
-
-    return result;
-  }
-
-}
-
-
-
-/** @brief Implementation of the preconditioned conjugate gradient solver, 
generic implementation for non-ViennaCL types.
-*
-* Following Algorithm 9.1 in "Iterative Methods for Sparse Linear Systems" by 
Y. Saad
-*
-* @param matrix     The system matrix
-* @param rhs        The load vector
-* @param tag        Solver configuration tag
-* @param precond    A preconditioner. Precondition operation is done via 
member function apply()
-* @return The result vector
-*/
-template<typename MatrixT, typename VectorT, typename PreconditionerT>
-VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag, 
PreconditionerT const & precond)
-{
-  return detail::solve_impl(matrix, rhs, tag, precond);
-}
-
-/** @brief Convenience overload for calling the CG solver using types from the 
C++ STL.
-  *
-  * A std::vector<std::map<T, U> > matrix is convenient for e.g. finite 
element assembly.
-  * It is not the fastest option for setting up a system, but often it is fast 
enough - particularly for just trying things out.
-  */
-template<typename IndexT, typename NumericT, typename PreconditionerT>
-std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & 
A, std::vector<NumericT> const & rhs, cg_tag const & tag, PreconditionerT const 
& precond)
-{
-  viennacl::compressed_matrix<NumericT> vcl_A;
-  viennacl::copy(A, vcl_A);
-
-  viennacl::vector<NumericT> vcl_rhs(rhs.size());
-  viennacl::copy(rhs, vcl_rhs);
-
-  viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond);
-
-  std::vector<NumericT> result(vcl_result.size());
-  viennacl::copy(vcl_result, result);
-  return result;
-}
-
-/** @brief Entry point for the unpreconditioned CG method.
- *
- *  @param matrix    The system matrix
- *  @param rhs       Right hand side vector (load vector)
- *  @param tag       A BiCGStab tag providing relative tolerances, etc.
- */
-template<typename MatrixT, typename VectorT>
-VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag)
-{
-  return solve(matrix, rhs, tag, viennacl::linalg::no_precond());
-}
-
-
-
-template<typename VectorT>
-class cg_solver
-{
-public:
-  typedef typename viennacl::result_of::cpu_value_type<VectorT>::type   
numeric_type;
-
-  cg_solver(cg_tag const & tag) : tag_(tag), monitor_callback_(NULL), 
user_data_(NULL) {}
-
-  template<typename MatrixT, typename PreconditionerT>
-  VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT 
const & precond) const
-  {
-    if (viennacl::traits::size(init_guess_) > 0) // take initial guess into 
account
-    {
-      VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_);
-      mod_rhs = b - mod_rhs;
-      VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, 
monitor_callback_, user_data_);
-      return init_guess_ + y;
-    }
-    return detail::solve_impl(A, b, tag_, precond, monitor_callback_, 
user_data_);
-  }
-
-
-  template<typename MatrixT>
-  VectorT operator()(MatrixT const & A, VectorT const & b) const
-  {
-    return operator()(A, b, viennacl::linalg::no_precond());
-  }
-
-  /** @brief Specifies an initial guess for the iterative solver.
-    *
-    * An iterative solver for Ax = b with initial guess x_0 is equivalent to 
an iterative solver for Ay = b' := b - Ax_0, where x = x_0 + y.
-    */
-  void set_initial_guess(VectorT const & x) { init_guess_ = x; }
-
-  /** @brief Sets a monitor function pointer to be called in each iteration. 
Set to NULL to run without monitor.
-   *
-   *  The monitor function is called with the current guess for the result as 
first argument and the current relative residual estimate as second argument.
-   *  The third argument is a pointer to user-defined data, through which 
additional information can be passed.
-   *  This pointer needs to be set with set_monitor_data. If not set, NULL is 
passed.
-   *  If the montior function returns true, the solver terminates (either 
convergence or divergence).
-   */
-  void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), 
void *user_data)
-  {
-    monitor_callback_ = monitor_fun;
-    user_data_ = user_data;
-  }
-
-  /** @brief Returns the solver tag containing basic configuration such as 
tolerances, etc. */
-  cg_tag const & tag() const { return tag_; }
-
-private:
-  cg_tag   tag_;
-  VectorT  init_guess_;
-  bool     (*monitor_callback_)(VectorT const &, numeric_type, void *);
-  void     *user_data_;
-};
-
-
-}
-}
-
-#endif

http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp
----------------------------------------------------------------------
diff --git 
a/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp 
b/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp
deleted file mode 100644
index 5325b7b..0000000
--- 
a/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef VIENNACL_LINALG_CIRCULANT_MATRIX_OPERATIONS_HPP_
-#define VIENNACL_LINALG_CIRCULANT_MATRIX_OPERATIONS_HPP_
-
-/* =========================================================================
-   Copyright (c) 2010-2016, Institute for Microelectronics,
-                            Institute for Analysis and Scientific Computing,
-                            TU Wien.
-   Portions of this software are copyright by UChicago Argonne, LLC.
-
-                            -----------------
-                  ViennaCL - The Vienna Computing Library
-                            -----------------
-
-   Project Head:    Karl Rupp                   [email protected]
-
-   (A list of authors and contributors can be found in the manual)
-
-   License:         MIT (X11), see file LICENSE in the base directory
-============================================================================= 
*/
-
-/** @file viennacl/linalg/circulant_matrix_operations.hpp
-    @brief Implementations of operations using circulant_matrix. Experimental.
-*/
-
-#include "viennacl/forwards.h"
-#include "viennacl/ocl/backend.hpp"
-#include "viennacl/scalar.hpp"
-#include "viennacl/vector.hpp"
-#include "viennacl/tools/tools.hpp"
-#include "viennacl/fft.hpp"
-//#include "viennacl/linalg/kernels/coordinate_matrix_kernels.h"
-
-namespace viennacl
-{
-namespace linalg
-{
-
-// A * x
-
-/** @brief Carries out matrix-vector multiplication with a circulant_matrix
-*
-* Implementation of the convenience expression result = prod(mat, vec);
-*
-* @param mat    The matrix
-* @param vec    The vector
-* @param result The result vector
-*/
-template<typename NumericT, unsigned int AlignmentV>
-void prod_impl(viennacl::circulant_matrix<NumericT, AlignmentV> const & mat,
-               viennacl::vector_base<NumericT> const & vec,
-               viennacl::vector_base<NumericT>       & result)
-{
-  assert(mat.size1() == result.size() && bool("Dimension mismatch"));
-  assert(mat.size2() == vec.size() && bool("Dimension mismatch"));
-  //result.clear();
-
-  //std::cout << "prod(circulant_matrix" << ALIGNMENT << ", vector) called 
with internal_nnz=" << mat.internal_nnz() << std::endl;
-
-  viennacl::vector<NumericT> circ(mat.elements().size() * 2);
-  viennacl::linalg::real_to_complex(mat.elements(), circ, 
mat.elements().size());
-
-  viennacl::vector<NumericT> tmp(vec.size() * 2);
-  viennacl::vector<NumericT> tmp2(vec.size() * 2);
-
-  viennacl::linalg::real_to_complex(vec, tmp, vec.size());
-  viennacl::linalg::convolve(circ, tmp, tmp2);
-  viennacl::linalg::complex_to_real(tmp2, result, vec.size());
-
-}
-
-} //namespace linalg
-} //namespace viennacl
-
-
-#endif

Reply via email to