http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp deleted file mode 100644 index 0b81203..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp +++ /dev/null @@ -1,398 +0,0 @@ -#ifndef VIENNACL_LINALG_AMG_HPP_ -#define VIENNACL_LINALG_AMG_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/amg.hpp - @brief Main include file for algebraic multigrid (AMG) preconditioners. Experimental. - - Implementation contributed by Markus Wagner -*/ - -#include <vector> -#include <cmath> -#include "viennacl/forwards.h" -#include "viennacl/tools/tools.hpp" -#include "viennacl/linalg/prod.hpp" -#include "viennacl/linalg/direct_solve.hpp" -#include "viennacl/compressed_matrix.hpp" - -#include "viennacl/linalg/detail/amg/amg_base.hpp" -#include "viennacl/linalg/sparse_matrix_operations.hpp" -#include "viennacl/linalg/amg_operations.hpp" -#include "viennacl/tools/timer.hpp" -#include "viennacl/linalg/direct_solve.hpp" -#include "viennacl/linalg/lu.hpp" - -#include <map> - -#ifdef VIENNACL_WITH_OPENMP - #include <omp.h> -#endif - -#define VIENNACL_AMG_MAX_LEVELS 20 - -namespace viennacl -{ -namespace linalg -{ - -class amg_coarse_problem_too_large_exception : public std::runtime_error -{ -public: - amg_coarse_problem_too_large_exception(std::string const & msg, vcl_size_t num_points) : std::runtime_error(msg), c_points_(num_points) {} - - /** @brief Returns the number of coarse points for which no further coarsening could be applied */ - vcl_size_t coarse_points() const { return c_points_; } - -private: - vcl_size_t c_points_; -}; - - -namespace detail -{ - /** @brief Sparse Galerkin product: Calculates A_coarse = trans(P)*A_fine*P = R*A_fine*P - * - * @param A_fine Operator matrix on fine grid (quadratic) - * @param P Prolongation/Interpolation matrix - * @param R Restriction matrix - * @param A_coarse Result matrix on coarse grid (Galerkin operator) - */ - template<typename NumericT> - void amg_galerkin_prod(compressed_matrix<NumericT> & A_fine, - compressed_matrix<NumericT> & P, - compressed_matrix<NumericT> & R, //P^T - compressed_matrix<NumericT> & A_coarse) - { - - compressed_matrix<NumericT> A_fine_times_P(viennacl::traits::context(A_fine)); - - // transpose P in memory (no known way of efficiently multiplying P^T * B for CSR-matrices P and B): - viennacl::linalg::detail::amg::amg_transpose(P, R); - - // compute Galerkin product using a temporary for the result of A_fine * P - A_fine_times_P = viennacl::linalg::prod(A_fine, P); - A_coarse = viennacl::linalg::prod(R, A_fine_times_P); - - } - - - /** @brief Setup AMG preconditioner - * - * @param list_of_A Operator matrices on all levels - * @param list_of_P Prolongation/Interpolation operators on all levels - * @param list_of_R Restriction operators on all levels - * @param list_of_amg_level_context Auxiliary datastructures for managing the grid hierarchy (coarse nodes, etc.) - * @param tag AMG preconditioner tag - */ - template<typename NumericT, typename AMGContextListT> - vcl_size_t amg_setup(std::vector<compressed_matrix<NumericT> > & list_of_A, - std::vector<compressed_matrix<NumericT> > & list_of_P, - std::vector<compressed_matrix<NumericT> > & list_of_R, - AMGContextListT & list_of_amg_level_context, - amg_tag & tag) - { - // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process. - vcl_size_t iterations = tag.get_coarse_levels(); - if (iterations == 0) - iterations = VIENNACL_AMG_MAX_LEVELS; - - for (vcl_size_t i=0; i<iterations; ++i) - { - list_of_amg_level_context[i].switch_context(tag.get_setup_context()); - list_of_amg_level_context[i].resize(list_of_A[i].size1(), list_of_A[i].nnz()); - - // Construct C and F points on coarse level (i is fine level, i+1 coarse level). - detail::amg::amg_coarse(list_of_A[i], list_of_amg_level_context[i], tag); - - // Calculate number of C and F points on level i. - unsigned int c_points = list_of_amg_level_context[i].num_coarse_; - unsigned int f_points = static_cast<unsigned int>(list_of_A[i].size1()) - c_points; - - if (f_points == 0 && c_points > tag.get_coarsening_cutoff()) - { - std::stringstream ss; - ss << "No further coarsening possible (" << c_points << " coarse points). Consider changing the strong connection threshold or increasing the coarsening cutoff." << std::endl; - throw amg_coarse_problem_too_large_exception(ss.str(), c_points); - } - - // Stop routine when the maximal coarse level is found (no C or F point). Coarsest level is level i. - if (c_points == 0 || f_points == 0) - break; - - // Construct interpolation matrix for level i. - detail::amg::amg_interpol(list_of_A[i], list_of_P[i], list_of_amg_level_context[i], tag); - - // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = trans(P). - amg_galerkin_prod(list_of_A[i], list_of_P[i], list_of_R[i], list_of_A[i+1]); - - // send matrices to target context: - list_of_A[i].switch_memory_context(tag.get_target_context()); - list_of_P[i].switch_memory_context(tag.get_target_context()); - list_of_R[i].switch_memory_context(tag.get_target_context()); - - // If Limit of coarse points is reached then stop. Coarsest level is level i+1. - if (tag.get_coarse_levels() == 0 && c_points <= tag.get_coarsening_cutoff()) - return i+1; - } - - return iterations; - } - - - /** @brief Initialize AMG preconditioner - * - * @param mat System matrix - * @param list_of_A Operator matrices on all levels - * @param list_of_P Prolongation/Interpolation operators on all levels - * @param list_of_R Restriction operators on all levels - * @param list_of_amg_level_context Auxiliary datastructures for managing the grid hierarchy (coarse nodes, etc.) - * @param tag AMG preconditioner tag - */ - template<typename MatrixT, typename InternalT1, typename InternalT2> - void amg_init(MatrixT const & mat, InternalT1 & list_of_A, InternalT1 & list_of_P, InternalT1 & list_of_R, InternalT2 & list_of_amg_level_context, amg_tag & tag) - { - typedef typename InternalT1::value_type SparseMatrixType; - - vcl_size_t num_levels = (tag.get_coarse_levels() > 0) ? tag.get_coarse_levels() : VIENNACL_AMG_MAX_LEVELS; - - list_of_A.resize(num_levels+1, SparseMatrixType(tag.get_setup_context())); - list_of_P.resize(num_levels, SparseMatrixType(tag.get_setup_context())); - list_of_R.resize(num_levels, SparseMatrixType(tag.get_setup_context())); - list_of_amg_level_context.resize(num_levels); - - // Insert operator matrix as operator for finest level. - //SparseMatrixType A0(mat); - //A.insert_element(0, A0); - list_of_A[0].switch_memory_context(viennacl::traits::context(mat)); - list_of_A[0] = mat; - list_of_A[0].switch_memory_context(tag.get_setup_context()); - } - - /** @brief Setup data structures for precondition phase for later use on the GPU - * - * @param result Result vector on all levels - * @param result_backup Copy of result vector on all levels - * @param rhs RHS vector on all levels - * @param residual Residual vector on all levels - * @param A Operators matrices on all levels from setup phase - * @param coarse_levels Number of coarse levels for which the datastructures should be set up. - * @param tag AMG preconditioner tag - */ - template<typename InternalVectorT, typename SparseMatrixT> - void amg_setup_apply(InternalVectorT & result, - InternalVectorT & result_backup, - InternalVectorT & rhs, - InternalVectorT & residual, - SparseMatrixT const & A, - vcl_size_t coarse_levels, - amg_tag const & tag) - { - typedef typename InternalVectorT::value_type VectorType; - - result.resize(coarse_levels + 1); - result_backup.resize(coarse_levels + 1); - rhs.resize(coarse_levels + 1); - residual.resize(coarse_levels); - - for (vcl_size_t level=0; level <= coarse_levels; ++level) - { - result[level] = VectorType(A[level].size1(), tag.get_target_context()); - result_backup[level] = VectorType(A[level].size1(), tag.get_target_context()); - rhs[level] = VectorType(A[level].size1(), tag.get_target_context()); - } - for (vcl_size_t level=0; level < coarse_levels; ++level) - { - residual[level] = VectorType(A[level].size1(), tag.get_target_context()); - } - } - - - /** @brief Pre-compute LU factorization for direct solve (ublas library). - * - * Speeds up precondition phase as this is computed only once overall instead of once per iteration. - * - * @param op Operator matrix for direct solve - * @param A Operator matrix on coarsest level - * @param tag AMG preconditioner tag - */ - template<typename NumericT, typename SparseMatrixT> - void amg_lu(viennacl::matrix<NumericT> & op, - SparseMatrixT const & A, - amg_tag const & tag) - { - op.switch_memory_context(tag.get_setup_context()); - op.resize(A.size1(), A.size2(), false); - viennacl::linalg::detail::amg::assign_to_dense(A, op); - - viennacl::linalg::lu_factorize(op); - op.switch_memory_context(tag.get_target_context()); - } - -} - -/** @brief AMG preconditioner class, can be supplied to solve()-routines -*/ -template<typename MatrixT> -class amg_precond; - - -/** @brief AMG preconditioner class, can be supplied to solve()-routines. -* -* Specialization for compressed_matrix -*/ -template<typename NumericT, unsigned int AlignmentV> -class amg_precond< compressed_matrix<NumericT, AlignmentV> > -{ - typedef viennacl::compressed_matrix<NumericT, AlignmentV> SparseMatrixType; - typedef viennacl::vector<NumericT> VectorType; - typedef detail::amg::amg_level_context AMGContextType; - -public: - - amg_precond() {} - - /** @brief The constructor. Builds data structures. - * - * @param mat System matrix - * @param tag The AMG tag - */ - amg_precond(compressed_matrix<NumericT, AlignmentV> const & mat, - amg_tag const & tag) - { - tag_ = tag; - - // Initialize data structures. - detail::amg_init(mat, A_list_, P_list_, R_list_, amg_context_list_, tag_); - } - - /** @brief Start setup phase for this class and copy data structures. - */ - void setup() - { - // Start setup phase. - vcl_size_t num_coarse_levels = detail::amg_setup(A_list_, P_list_, R_list_, amg_context_list_, tag_); - - // Setup precondition phase (Data structures). - detail::amg_setup_apply(result_list_, result_backup_list_, rhs_list_, residual_list_, A_list_, num_coarse_levels, tag_); - - // LU factorization for direct solve. - detail::amg_lu(coarsest_op_, A_list_[num_coarse_levels], tag_); - } - - - /** @brief Precondition Operation - * - * @param vec The vector to which preconditioning is applied to - */ - template<typename VectorT> - void apply(VectorT & vec) const - { - vcl_size_t level; - - // Precondition operation (Yang, p.3). - rhs_list_[0] = vec; - - // Part 1: Restrict down to coarsest level - for (level=0; level < residual_list_.size(); level++) - { - result_list_[level].clear(); - - // Apply Smoother presmooth_ times. - viennacl::linalg::detail::amg::smooth_jacobi(static_cast<unsigned int>(tag_.get_presmooth_steps()), - A_list_[level], - result_list_[level], - result_backup_list_[level], - rhs_list_[level], - static_cast<NumericT>(tag_.get_jacobi_weight())); - - // Compute residual. - //residual[level] = rhs_[level] - viennacl::linalg::prod(A_[level], result_[level]); - residual_list_[level] = viennacl::linalg::prod(A_list_[level], result_list_[level]); - residual_list_[level] = rhs_list_[level] - residual_list_[level]; - - // Restrict to coarse level. Result is RHS of coarse level equation. - //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]); - rhs_list_[level+1] = viennacl::linalg::prod(R_list_[level], residual_list_[level]); - } - - // Part 2: On highest level use direct solve to solve equation (on the CPU) - result_list_[level] = rhs_list_[level]; - viennacl::linalg::lu_substitute(coarsest_op_, result_list_[level]); - - // Part 3: Prolongation to finest level - for (int level2 = static_cast<int>(residual_list_.size()-1); level2 >= 0; level2--) - { - level = static_cast<vcl_size_t>(level2); - - // Interpolate error to fine level and correct solution. - result_backup_list_[level] = viennacl::linalg::prod(P_list_[level], result_list_[level+1]); - result_list_[level] += result_backup_list_[level]; - - // Apply Smoother postsmooth_ times. - viennacl::linalg::detail::amg::smooth_jacobi(static_cast<unsigned int>(tag_.get_postsmooth_steps()), - A_list_[level], - result_list_[level], - result_backup_list_[level], - rhs_list_[level], - static_cast<NumericT>(tag_.get_jacobi_weight())); - } - vec = result_list_[0]; - } - - /** @brief Returns the total number of multigrid levels in the hierarchy including the finest level. */ - vcl_size_t levels() const { return residual_list_.size(); } - - - /** @brief Returns the problem/operator size at the respective multigrid level - * - * @param level Index of the multigrid level. 0 is the finest level, levels() - 1 is the coarsest level. - */ - vcl_size_t size(vcl_size_t level) const - { - assert(level < levels() && bool("Level index out of bounds!")); - return residual_list_[level].size(); - } - - /** @brief Returns the associated preconditioner tag containing the configuration for the multigrid preconditioner. */ - amg_tag const & tag() const { return tag_; } - -private: - std::vector<SparseMatrixType> A_list_; - std::vector<SparseMatrixType> P_list_; - std::vector<SparseMatrixType> R_list_; - std::vector<AMGContextType> amg_context_list_; - - viennacl::matrix<NumericT> coarsest_op_; - - mutable std::vector<VectorType> result_list_; - mutable std::vector<VectorType> result_backup_list_; - mutable std::vector<VectorType> rhs_list_; - mutable std::vector<VectorType> residual_list_; - - amg_tag tag_; -}; - -} -} - - - -#endif -
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp deleted file mode 100644 index 9c7f79f..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp +++ /dev/null @@ -1,238 +0,0 @@ -#ifndef VIENNACL_LINALG_AMG_OPERATIONS_HPP_ -#define VIENNACL_LINALG_AMG_OPERATIONS_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the PDF manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/amg_operations.hpp - @brief Implementations of operations for algebraic multigrid -*/ - -#include "viennacl/forwards.h" -#include "viennacl/scalar.hpp" -#include "viennacl/vector.hpp" -#include "viennacl/matrix.hpp" -#include "viennacl/tools/tools.hpp" -#include "viennacl/linalg/detail/amg/amg_base.hpp" -#include "viennacl/linalg/host_based/amg_operations.hpp" - -#ifdef VIENNACL_WITH_OPENCL - #include "viennacl/linalg/opencl/amg_operations.hpp" -#endif - -#ifdef VIENNACL_WITH_CUDA - #include "viennacl/linalg/cuda/amg_operations.hpp" -#endif - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ -namespace amg -{ - -template<typename NumericT, typename AMGContextT> -void amg_influence(compressed_matrix<NumericT> const & A, AMGContextT & amg_context, amg_tag & tag) -{ - switch (viennacl::traits::handle(A).get_active_handle_id()) - { - case viennacl::MAIN_MEMORY: - viennacl::linalg::host_based::amg::amg_influence(A, amg_context, tag); - break; -#ifdef VIENNACL_WITH_OPENCL - case viennacl::OPENCL_MEMORY: - viennacl::linalg::opencl::amg::amg_influence(A, amg_context, tag); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case viennacl::CUDA_MEMORY: - viennacl::linalg::cuda::amg::amg_influence(A, amg_context, tag); - break; -#endif - case viennacl::MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("not implemented"); - } -} - - -template<typename NumericT, typename AMGContextT> -void amg_coarse(compressed_matrix<NumericT> const & A, AMGContextT & amg_context, amg_tag & tag) -{ - switch (viennacl::traits::handle(A).get_active_handle_id()) - { - case viennacl::MAIN_MEMORY: - viennacl::linalg::host_based::amg::amg_coarse(A, amg_context, tag); - break; -#ifdef VIENNACL_WITH_OPENCL - case viennacl::OPENCL_MEMORY: - viennacl::linalg::opencl::amg::amg_coarse(A, amg_context, tag); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case viennacl::CUDA_MEMORY: - viennacl::linalg::cuda::amg::amg_coarse(A, amg_context, tag); - break; -#endif - case viennacl::MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("not implemented"); - } -} - - -template<typename NumericT, typename AMGContextT> -void amg_interpol(compressed_matrix<NumericT> const & A, - compressed_matrix<NumericT> & P, - AMGContextT & amg_context, - amg_tag & tag) -{ - switch (viennacl::traits::handle(A).get_active_handle_id()) - { - case viennacl::MAIN_MEMORY: - viennacl::linalg::host_based::amg::amg_interpol(A, P, amg_context, tag); - break; -#ifdef VIENNACL_WITH_OPENCL - case viennacl::OPENCL_MEMORY: - viennacl::linalg::opencl::amg::amg_interpol(A, P, amg_context, tag); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case viennacl::CUDA_MEMORY: - viennacl::linalg::cuda::amg::amg_interpol(A, P, amg_context, tag); - break; -#endif - case viennacl::MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("not implemented"); - } -} - - -template<typename NumericT> -void amg_transpose(compressed_matrix<NumericT> & A, - compressed_matrix<NumericT> & B) -{ - viennacl::context orig_ctx = viennacl::traits::context(A); - viennacl::context cpu_ctx(viennacl::MAIN_MEMORY); - (void)orig_ctx; - (void)cpu_ctx; - - switch (viennacl::traits::handle(A).get_active_handle_id()) - { - case viennacl::MAIN_MEMORY: - viennacl::linalg::host_based::amg::amg_transpose(A, B); - break; -#ifdef VIENNACL_WITH_OPENCL - case viennacl::OPENCL_MEMORY: - A.switch_memory_context(cpu_ctx); - B.switch_memory_context(cpu_ctx); - viennacl::linalg::host_based::amg::amg_transpose(A, B); - A.switch_memory_context(orig_ctx); - B.switch_memory_context(orig_ctx); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case viennacl::CUDA_MEMORY: - A.switch_memory_context(cpu_ctx); - B.switch_memory_context(cpu_ctx); - viennacl::linalg::host_based::amg::amg_transpose(A, B); - A.switch_memory_context(orig_ctx); - B.switch_memory_context(orig_ctx); - //viennacl::linalg::cuda::amg_transpose(A, B); - break; -#endif - case viennacl::MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("not implemented"); - } -} - -/** Assign sparse matrix A to dense matrix B */ -template<typename SparseMatrixType, typename NumericT> -typename viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type -assign_to_dense(SparseMatrixType const & A, - viennacl::matrix_base<NumericT> & B) -{ - assert( (A.size1() == B.size1()) && bool("Size check failed for assignment to dense matrix: size1(A) != size1(B)")); - assert( (A.size2() == B.size1()) && bool("Size check failed for assignment to dense matrix: size2(A) != size2(B)")); - - switch (viennacl::traits::handle(A).get_active_handle_id()) - { - case viennacl::MAIN_MEMORY: - viennacl::linalg::host_based::amg::assign_to_dense(A, B); - break; -#ifdef VIENNACL_WITH_OPENCL - case viennacl::OPENCL_MEMORY: - viennacl::linalg::opencl::amg::assign_to_dense(A, B); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case viennacl::CUDA_MEMORY: - viennacl::linalg::cuda::amg::assign_to_dense(A, B); - break; -#endif - case viennacl::MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("not implemented"); - } -} - -template<typename NumericT> -void smooth_jacobi(unsigned int iterations, - compressed_matrix<NumericT> const & A, - vector<NumericT> & x, - vector<NumericT> & x_backup, - vector<NumericT> const & rhs_smooth, - NumericT weight) -{ - switch (viennacl::traits::handle(A).get_active_handle_id()) - { - case viennacl::MAIN_MEMORY: - viennacl::linalg::host_based::amg::smooth_jacobi(iterations, A, x, x_backup, rhs_smooth, weight); - break; -#ifdef VIENNACL_WITH_OPENCL - case viennacl::OPENCL_MEMORY: - viennacl::linalg::opencl::amg::smooth_jacobi(iterations, A, x, x_backup, rhs_smooth, weight); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case viennacl::CUDA_MEMORY: - viennacl::linalg::cuda::amg::smooth_jacobi(iterations, A, x, x_backup, rhs_smooth, weight); - break; -#endif - case viennacl::MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("not implemented"); - } -} - -} //namespace amg -} //namespace detail -} //namespace linalg -} //namespace viennacl - - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp deleted file mode 100644 index 57bc89a..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp +++ /dev/null @@ -1,598 +0,0 @@ -#ifndef VIENNACL_LINALG_BICGSTAB_HPP_ -#define VIENNACL_LINALG_BICGSTAB_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file bicgstab.hpp - @brief The stabilized bi-conjugate gradient method is implemented here -*/ - -#include <vector> -#include <cmath> -#include <numeric> - -#include "viennacl/forwards.h" -#include "viennacl/tools/tools.hpp" -#include "viennacl/linalg/prod.hpp" -#include "viennacl/linalg/inner_prod.hpp" -#include "viennacl/linalg/norm_2.hpp" -#include "viennacl/traits/clear.hpp" -#include "viennacl/traits/size.hpp" -#include "viennacl/traits/context.hpp" -#include "viennacl/meta/result_of.hpp" -#include "viennacl/linalg/iterative_operations.hpp" - -namespace viennacl -{ -namespace linalg -{ - -/** @brief A tag for the stabilized Bi-conjugate gradient solver. Used for supplying solver parameters and for dispatching the solve() function -*/ -class bicgstab_tag -{ -public: - /** @brief The constructor - * - * @param tol Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||) - * @param max_iters The maximum number of iterations - * @param max_iters_before_restart The maximum number of iterations before BiCGStab is reinitialized (to avoid accumulation of round-off errors) - */ - bicgstab_tag(double tol = 1e-8, vcl_size_t max_iters = 400, vcl_size_t max_iters_before_restart = 200) - : tol_(tol), abs_tol_(0), iterations_(max_iters), iterations_before_restart_(max_iters_before_restart) {} - - /** @brief Returns the relative tolerance */ - double tolerance() const { return tol_; } - - /** @brief Returns the absolute tolerance */ - double abs_tolerance() const { return abs_tol_; } - /** @brief Sets the absolute tolerance */ - void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; } - - /** @brief Returns the maximum number of iterations */ - vcl_size_t max_iterations() const { return iterations_; } - /** @brief Returns the maximum number of iterations before a restart*/ - vcl_size_t max_iterations_before_restart() const { return iterations_before_restart_; } - - /** @brief Return the number of solver iterations: */ - vcl_size_t iters() const { return iters_taken_; } - void iters(vcl_size_t i) const { iters_taken_ = i; } - - /** @brief Returns the estimated relative error at the end of the solver run */ - double error() const { return last_error_; } - /** @brief Sets the estimated relative error at the end of the solver run */ - void error(double e) const { last_error_ = e; } - -private: - double tol_; - double abs_tol_; - vcl_size_t iterations_; - vcl_size_t iterations_before_restart_; - - //return values from solver - mutable vcl_size_t iters_taken_; - mutable double last_error_; -}; - - - -namespace detail -{ - /** @brief Implementation of a pipelined stabilized Bi-conjugate gradient solver */ - template<typename MatrixT, typename NumericT> - viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType const & A, - viennacl::vector_base<NumericT> const & rhs, - bicgstab_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - viennacl::vector<NumericT> result = viennacl::zero_vector<NumericT>(rhs.size(), viennacl::traits::context(rhs)); - - viennacl::vector<NumericT> residual = rhs; - viennacl::vector<NumericT> p = rhs; - viennacl::vector<NumericT> r0star = rhs; - viennacl::vector<NumericT> Ap = rhs; - viennacl::vector<NumericT> s = rhs; - viennacl::vector<NumericT> As = rhs; - - // Layout of temporary buffer: - // chunk 0: <residual, r_0^*> - // chunk 1: <As, As> - // chunk 2: <As, s> - // chunk 3: <Ap, r_0^*> - // chunk 4: <As, r_0^*> - // chunk 5: <s, s> - vcl_size_t buffer_size_per_vector = 256; - vcl_size_t num_buffer_chunks = 6; - viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(num_buffer_chunks*buffer_size_per_vector, viennacl::traits::context(rhs)); // temporary buffer - std::vector<NumericT> host_inner_prod_buffer(inner_prod_buffer.size()); - - NumericT norm_rhs_host = viennacl::linalg::norm_2(residual); - NumericT beta; - NumericT alpha; - NumericT omega; - NumericT residual_norm = norm_rhs_host; - inner_prod_buffer[0] = norm_rhs_host * norm_rhs_host; - - NumericT r_dot_r0 = 0; - NumericT As_dot_As = 0; - NumericT As_dot_s = 0; - NumericT Ap_dot_r0 = 0; - NumericT As_dot_r0 = 0; - NumericT s_dot_s = 0; - - if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm is zero - return result; - - for (vcl_size_t i = 0; i < tag.max_iterations(); ++i) - { - tag.iters(i+1); - // Ap = A*p_j - // Ap_dot_r0 = <Ap, r_0^*> - viennacl::linalg::pipelined_bicgstab_prod(A, p, Ap, r0star, - inner_prod_buffer, buffer_size_per_vector, 3*buffer_size_per_vector); - - //////// first (weak) synchronization point //// - - ///// method 1: compute alpha on host: - // - //// we only need the second chunk of the buffer for computing Ap_dot_r0: - //viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin()); - //Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + buffer_size_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, ScalarType(0)); - - //alpha = residual_dot_r0 / Ap_dot_r0; - - //// s_j = r_j - alpha_j q_j - //s = residual - alpha * Ap; - - ///// method 2: compute alpha on device: - // s = r - alpha * Ap - // <s, s> first stage - // dump alpha at end of inner_prod_buffer - viennacl::linalg::pipelined_bicgstab_update_s(s, residual, Ap, - inner_prod_buffer, buffer_size_per_vector, 5*buffer_size_per_vector); - - // As = A*s_j - // As_dot_As = <As, As> - // As_dot_s = <As, s> - // As_dot_r0 = <As, r_0^*> - viennacl::linalg::pipelined_bicgstab_prod(A, s, As, r0star, - inner_prod_buffer, buffer_size_per_vector, 4*buffer_size_per_vector); - - //////// second (strong) synchronization point //// - - viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin()); - - typedef typename std::vector<NumericT>::difference_type difference_type; - - r_dot_r0 = std::accumulate(host_inner_prod_buffer.begin(), host_inner_prod_buffer.begin() + difference_type( buffer_size_per_vector), NumericT(0)); - As_dot_As = std::accumulate(host_inner_prod_buffer.begin() + difference_type( buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(2 * buffer_size_per_vector), NumericT(0)); - As_dot_s = std::accumulate(host_inner_prod_buffer.begin() + difference_type(2 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(3 * buffer_size_per_vector), NumericT(0)); - Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + difference_type(3 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(4 * buffer_size_per_vector), NumericT(0)); - As_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + difference_type(4 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(5 * buffer_size_per_vector), NumericT(0)); - s_dot_s = std::accumulate(host_inner_prod_buffer.begin() + difference_type(5 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(6 * buffer_size_per_vector), NumericT(0)); - - alpha = r_dot_r0 / Ap_dot_r0; - beta = - As_dot_r0 / Ap_dot_r0; - omega = As_dot_s / As_dot_As; - - residual_norm = std::sqrt(s_dot_s - NumericT(2.0) * omega * As_dot_s + omega * omega * As_dot_As); - if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data)) - break; - if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance() || residual_norm < tag.abs_tolerance()) - break; - - // x_{j+1} = x_j + alpha * p_j + omega * s_j - // r_{j+1} = s_j - omega * t_j - // p_{j+1} = r_{j+1} + beta * (p_j - omega * q_j) - // and compute first stage of r_dot_r0 = <r_{j+1}, r_o^*> for use in next iteration - viennacl::linalg::pipelined_bicgstab_vector_update(result, alpha, p, omega, s, - residual, As, - beta, Ap, - r0star, inner_prod_buffer, buffer_size_per_vector); - } - - //store last error estimate: - tag.error(residual_norm / norm_rhs_host); - - return result; - } - - /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ - template<typename NumericT> - viennacl::vector<NumericT> solve_impl(viennacl::compressed_matrix<NumericT> const & A, - viennacl::vector<NumericT> const & rhs, - bicgstab_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); - } - - - /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ - template<typename NumericT> - viennacl::vector<NumericT> solve_impl(viennacl::coordinate_matrix<NumericT> const & A, - viennacl::vector<NumericT> const & rhs, - bicgstab_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); - } - - - - /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ - template<typename NumericT> - viennacl::vector<NumericT> solve_impl(viennacl::ell_matrix<NumericT> const & A, - viennacl::vector<NumericT> const & rhs, - bicgstab_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); - } - - - - /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ - template<typename NumericT> - viennacl::vector<NumericT> solve_impl(viennacl::sliced_ell_matrix<NumericT> const & A, - viennacl::vector<NumericT> const & rhs, - bicgstab_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); - } - - - /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ - template<typename NumericT> - viennacl::vector<NumericT> solve_impl(viennacl::hyb_matrix<NumericT> const & A, - viennacl::vector<NumericT> const & rhs, - bicgstab_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); - } - - - /** @brief Implementation of the unpreconditioned stabilized Bi-conjugate gradient solver - * - * Following the description in "Iterative Methods for Sparse Linear Systems" by Y. Saad - * - * @param matrix The system matrix - * @param rhs The load vector - * @param tag Solver configuration tag - * @param monitor A callback routine which is called at each GMRES restart - * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data - * @return The result vector - */ - template<typename MatrixT, typename VectorT> - VectorT solve_impl(MatrixT const & matrix, - VectorT const & rhs, - bicgstab_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL, - void *monitor_data = NULL) - { - typedef typename viennacl::result_of::value_type<VectorT>::type NumericType; - typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType; - VectorT result = rhs; - viennacl::traits::clear(result); - - VectorT residual = rhs; - VectorT p = rhs; - VectorT r0star = rhs; - VectorT tmp0 = rhs; - VectorT tmp1 = rhs; - VectorT s = rhs; - - CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual); - CPU_NumericType ip_rr0star = norm_rhs_host * norm_rhs_host; - CPU_NumericType beta; - CPU_NumericType alpha; - CPU_NumericType omega; - //ScalarType inner_prod_temp; //temporary variable for inner product computation - CPU_NumericType new_ip_rr0star = 0; - CPU_NumericType residual_norm = norm_rhs_host; - - if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm is zero - return result; - - bool restart_flag = true; - vcl_size_t last_restart = 0; - for (vcl_size_t i = 0; i < tag.max_iterations(); ++i) - { - if (restart_flag) - { - residual = viennacl::linalg::prod(matrix, result); - residual = rhs - residual; - p = residual; - r0star = residual; - ip_rr0star = viennacl::linalg::norm_2(residual); - ip_rr0star *= ip_rr0star; - restart_flag = false; - last_restart = i; - } - - tag.iters(i+1); - tmp0 = viennacl::linalg::prod(matrix, p); - alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star); - - s = residual - alpha*tmp0; - - tmp1 = viennacl::linalg::prod(matrix, s); - CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1); - omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1); - - result += alpha * p + omega * s; - residual = s - omega * tmp1; - - new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star); - residual_norm = viennacl::linalg::norm_2(residual); - if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data)) - break; - if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance() || residual_norm < tag.abs_tolerance()) - break; - - beta = new_ip_rr0star / ip_rr0star * alpha/omega; - ip_rr0star = new_ip_rr0star; - - if ( (ip_rr0star <= 0 && ip_rr0star >= 0) - || (omega <= 0 && omega >= 0) - || (i - last_restart > tag.max_iterations_before_restart()) - ) //search direction degenerate. A restart might help - restart_flag = true; - - // Execution of - // p = residual + beta * (p - omega*tmp0); - // without introducing temporary vectors: - p -= omega * tmp0; - p = residual + beta * p; - } - - //store last error estimate: - tag.error(residual_norm / norm_rhs_host); - - return result; - } - - - /** @brief Implementation of the preconditioned stabilized Bi-conjugate gradient solver - * - * Following the description of the unpreconditioned case in "Iterative Methods for Sparse Linear Systems" by Y. Saad - * - * @param matrix The system matrix - * @param rhs The load vector - * @param tag Solver configuration tag - * @param precond A preconditioner. Precondition operation is done via member function apply() - * @param monitor A callback routine which is called at each GMRES restart - * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data - * @return The result vector - */ - template<typename MatrixT, typename VectorT, typename PreconditionerT> - VectorT solve_impl(MatrixT const & matrix, - VectorT const & rhs, - bicgstab_tag const & tag, - PreconditionerT const & precond, - bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL, - void *monitor_data = NULL) - { - typedef typename viennacl::result_of::value_type<VectorT>::type NumericType; - typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType; - VectorT result = rhs; - viennacl::traits::clear(result); - - VectorT residual = rhs; - VectorT r0star = residual; //can be chosen arbitrarily in fact - VectorT tmp0 = rhs; - VectorT tmp1 = rhs; - VectorT s = rhs; - - VectorT p = residual; - - CPU_NumericType ip_rr0star = viennacl::linalg::norm_2(residual); - CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual); - CPU_NumericType beta; - CPU_NumericType alpha; - CPU_NumericType omega; - CPU_NumericType new_ip_rr0star = 0; - CPU_NumericType residual_norm = norm_rhs_host; - - if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm is zero - return result; - - bool restart_flag = true; - vcl_size_t last_restart = 0; - for (unsigned int i = 0; i < tag.max_iterations(); ++i) - { - if (restart_flag) - { - residual = viennacl::linalg::prod(matrix, result); - residual = rhs - residual; - precond.apply(residual); - p = residual; - r0star = residual; - ip_rr0star = viennacl::linalg::norm_2(residual); - ip_rr0star *= ip_rr0star; - restart_flag = false; - last_restart = i; - } - - tag.iters(i+1); - tmp0 = viennacl::linalg::prod(matrix, p); - precond.apply(tmp0); - alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star); - - s = residual - alpha*tmp0; - - tmp1 = viennacl::linalg::prod(matrix, s); - precond.apply(tmp1); - CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1); - omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1); - - result += alpha * p + omega * s; - residual = s - omega * tmp1; - - residual_norm = viennacl::linalg::norm_2(residual); - if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data)) - break; - if (residual_norm / norm_rhs_host < tag.tolerance() || residual_norm < tag.abs_tolerance()) - break; - - new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star); - - beta = new_ip_rr0star / ip_rr0star * alpha/omega; - ip_rr0star = new_ip_rr0star; - - if ( (ip_rr0star >= 0 && ip_rr0star <= 0) || (omega >=0 && omega <= 0) || i - last_restart > tag.max_iterations_before_restart()) //search direction degenerate. A restart might help - restart_flag = true; - - // Execution of - // p = residual + beta * (p - omega*tmp0); - // without introducing temporary vectors: - p -= omega * tmp0; - p = residual + beta * p; - - //std::cout << "Rel. Residual in current step: " << std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host)) << std::endl; - } - - //store last error estimate: - tag.error(residual_norm / norm_rhs_host); - - return result; - } - -} - - - -template<typename MatrixT, typename VectorT, typename PreconditionerT> -VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag, PreconditionerT const & precond) -{ - return detail::solve_impl(matrix, rhs, tag, precond); -} - - -/** @brief Convenience overload for calling the preconditioned BiCGStab solver using types from the C++ STL. - * - * A std::vector<std::map<T, U> > matrix is convenient for e.g. finite element assembly. - * It is not the fastest option for setting up a system, but often it is fast enough - particularly for just trying things out. - */ -template<typename IndexT, typename NumericT, typename PreconditionerT> -std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & A, std::vector<NumericT> const & rhs, bicgstab_tag const & tag, PreconditionerT const & precond) -{ - viennacl::compressed_matrix<NumericT> vcl_A; - viennacl::copy(A, vcl_A); - - viennacl::vector<NumericT> vcl_rhs(rhs.size()); - viennacl::copy(rhs, vcl_rhs); - - viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond); - - std::vector<NumericT> result(vcl_result.size()); - viennacl::copy(vcl_result, result); - return result; -} - -/** @brief Entry point for the unpreconditioned BiCGStab method. - * - * @param matrix The system matrix - * @param rhs Right hand side vector (load vector) - * @param tag A BiCGStab tag providing relative tolerances, etc. - */ -template<typename MatrixT, typename VectorT> -VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag) -{ - return solve(matrix, rhs, tag, viennacl::linalg::no_precond()); -} - - - -template<typename VectorT> -class bicgstab_solver -{ -public: - typedef typename viennacl::result_of::cpu_value_type<VectorT>::type numeric_type; - - bicgstab_solver(bicgstab_tag const & tag) : tag_(tag), monitor_callback_(NULL), user_data_(NULL) {} - - template<typename MatrixT, typename PreconditionerT> - VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT const & precond) const - { - if (viennacl::traits::size(init_guess_) > 0) // take initial guess into account - { - VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_); - mod_rhs = b - mod_rhs; - VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_); - return init_guess_ + y; - } - return detail::solve_impl(A, b, tag_, precond, monitor_callback_, user_data_); - } - - - template<typename MatrixT> - VectorT operator()(MatrixT const & A, VectorT const & b) const - { - return operator()(A, b, viennacl::linalg::no_precond()); - } - - /** @brief Specifies an initial guess for the iterative solver. - * - * An iterative solver for Ax = b with initial guess x_0 is equivalent to an iterative solver for Ay = b' := b - Ax_0, where x = x_0 + y. - */ - void set_initial_guess(VectorT const & x) { init_guess_ = x; } - - /** @brief Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor. - * - * The monitor function is called with the current guess for the result as first argument and the current relative residual estimate as second argument. - * The third argument is a pointer to user-defined data, through which additional information can be passed. - * This pointer needs to be set with set_monitor_data. If not set, NULL is passed. - * If the montior function returns true, the solver terminates (either convergence or divergence). - */ - void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data) - { - monitor_callback_ = monitor_fun; - user_data_ = user_data; - } - - /** @brief Returns the solver tag containing basic configuration such as tolerances, etc. */ - bicgstab_tag const & tag() const { return tag_; } - -private: - bicgstab_tag tag_; - VectorT init_guess_; - bool (*monitor_callback_)(VectorT const &, numeric_type, void *); - void *user_data_; -}; - - -} -} - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp deleted file mode 100644 index a2daf5e..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp +++ /dev/null @@ -1,179 +0,0 @@ -#ifndef VIENNACL_LINALG_BISECT_HPP_ -#define VIENNACL_LINALG_BISECT_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/bisect.hpp -* @brief Implementation of the algorithm for finding eigenvalues of a tridiagonal matrix. -* -* Contributed by Guenther Mader and Astrid Rupp. -*/ - -#include <vector> -#include <cmath> -#include <limits> -#include <cstddef> -#include "viennacl/meta/result_of.hpp" - -namespace viennacl -{ -namespace linalg -{ - -namespace detail -{ - /** - * @brief overloaded function for copying vectors - */ - template<typename NumericT, typename OtherVectorT> - void copy_vec_to_vec(viennacl::vector<NumericT> const & src, OtherVectorT & dest) - { - viennacl::copy(src, dest); - } - - template<typename OtherVectorT, typename NumericT> - void copy_vec_to_vec(OtherVectorT const & src, viennacl::vector<NumericT> & dest) - { - viennacl::copy(src, dest); - } - - template<typename VectorT1, typename VectorT2> - void copy_vec_to_vec(VectorT1 const & src, VectorT2 & dest) - { - for (vcl_size_t i=0; i<src.size(); ++i) - dest[i] = src[i]; - } - -} //namespace detail - -/** -* @brief Implementation of the bisect-algorithm for the calculation of the eigenvalues of a tridiagonal matrix. Experimental - interface might change. -* -* Refer to "Calculation of the Eigenvalues of a Symmetric Tridiagonal Matrix by the Method of Bisection" in the Handbook Series Linear Algebra, contributed by Barth, Martin, and Wilkinson. -* http://www.maths.ed.ac.uk/~aar/papers/bamawi.pdf -* -* @param alphas Elements of the main diagonal -* @param betas Elements of the secondary diagonal -* @return Returns the eigenvalues of the tridiagonal matrix defined by alpha and beta -*/ -template<typename VectorT> -std::vector< - typename viennacl::result_of::cpu_value_type<typename VectorT::value_type>::type - > -bisect(VectorT const & alphas, VectorT const & betas) -{ - typedef typename viennacl::result_of::value_type<VectorT>::type NumericType; - typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType; - - vcl_size_t size = betas.size(); - std::vector<CPU_NumericType> x_temp(size); - - - std::vector<CPU_NumericType> beta_bisect; - std::vector<CPU_NumericType> wu; - - double rel_error = std::numeric_limits<CPU_NumericType>::epsilon(); - beta_bisect.push_back(0); - - for (vcl_size_t i = 1; i < size; i++) - beta_bisect.push_back(betas[i] * betas[i]); - - double xmin = alphas[size - 1] - std::fabs(betas[size - 1]); - double xmax = alphas[size - 1] + std::fabs(betas[size - 1]); - - for (vcl_size_t i = 0; i < size - 1; i++) - { - double h = std::fabs(betas[i]) + std::fabs(betas[i + 1]); - if (alphas[i] + h > xmax) - xmax = alphas[i] + h; - if (alphas[i] - h < xmin) - xmin = alphas[i] - h; - } - - - double eps1 = 1e-6; - /*double eps2 = (xmin + xmax > 0) ? (rel_error * xmax) : (-rel_error * xmin); - if (eps1 <= 0) - eps1 = eps2; - else - eps2 = 0.5 * eps1 + 7.0 * eps2; */ - - double x0 = xmax; - - for (vcl_size_t i = 0; i < size; i++) - { - x_temp[i] = xmax; - wu.push_back(xmin); - } - - for (long k = static_cast<long>(size) - 1; k >= 0; --k) - { - double xu = xmin; - for (long i = k; i >= 0; --i) - { - if (xu < wu[vcl_size_t(k-i)]) - { - xu = wu[vcl_size_t(i)]; - break; - } - } - - if (x0 > x_temp[vcl_size_t(k)]) - x0 = x_temp[vcl_size_t(k)]; - - double x1 = (xu + x0) / 2.0; - while (x0 - xu > 2.0 * rel_error * (std::fabs(xu) + std::fabs(x0)) + eps1) - { - vcl_size_t a = 0; - double q = 1; - for (vcl_size_t i = 0; i < size; i++) - { - if (q > 0 || q < 0) - q = alphas[i] - x1 - beta_bisect[i] / q; - else - q = alphas[i] - x1 - std::fabs(betas[i] / rel_error); - - if (q < 0) - a++; - } - - if (a <= static_cast<vcl_size_t>(k)) - { - xu = x1; - if (a < 1) - wu[0] = x1; - else - { - wu[a] = x1; - if (x_temp[a - 1] > x1) - x_temp[a - 1] = x1; - } - } - else - x0 = x1; - - x1 = (xu + x0) / 2.0; - } - x_temp[vcl_size_t(k)] = x1; - } - return x_temp; -} - -} // end namespace linalg -} // end namespace viennacl -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp deleted file mode 100644 index 6918b14..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp +++ /dev/null @@ -1,173 +0,0 @@ -#ifndef VIENNACL_LINALG_BISECT_GPU -#define VIENNACL_LINALG_BISECT_GPU - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - -/** @file viennacl/linalg/bisect_gpu.hpp - @brief Implementation of an bisection algorithm for eigenvalues - - Implementation based on the sample provided with the CUDA 6.0 SDK, for which - the creation of derivative works is allowed by including the following statement: - "This software contains source code provided by NVIDIA Corporation." -*/ -// includes, system -#include <stdlib.h> -#include <stdio.h> -#include <string.h> - -#include "viennacl/scalar.hpp" -#include "viennacl/vector.hpp" -#include "viennacl/matrix.hpp" - -// includes, project -#include "viennacl/linalg/detail/bisect/structs.hpp" -#include "viennacl/linalg/detail/bisect/gerschgorin.hpp" -#include "viennacl/linalg/detail/bisect/bisect_large.hpp" -#include "viennacl/linalg/detail/bisect/bisect_small.hpp" - - -namespace viennacl -{ -namespace linalg -{ -/////////////////////////////////////////////////////////////////////////// -//! @brief bisect The bisection algorithm computes the eigevalues -//! of a symmetric tridiagonal matrix. -//! @param diagonal diagonal elements of the matrix -//! @param superdiagonal superdiagonal elements of the matrix -//! @param eigenvalues Vectors with the eigenvalues in ascending order -//! @return return false if any errors occured -/// -//! overloaded function template: std::vectors as parameters -template<typename NumericT> -bool -bisect(const std::vector<NumericT> & diagonal, const std::vector<NumericT> & superdiagonal, std::vector<NumericT> & eigenvalues) -{ - assert(diagonal.size() == superdiagonal.size() && - diagonal.size() == eigenvalues.size() && - bool("Input vectors do not have the same sizes!")); - bool bResult = false; - // flag if the matrix size is due to explicit user request - // desired precision of eigenvalues - NumericT precision = static_cast<NumericT>(0.00001); - const unsigned int mat_size = static_cast<unsigned int>(diagonal.size()); - - // set up input - viennacl::linalg::detail::InputData<NumericT> input(diagonal, superdiagonal, mat_size); - - NumericT lg = FLT_MAX; - NumericT ug = -FLT_MAX; - // compute Gerschgorin interval - viennacl::linalg::detail::computeGerschgorin(input.std_a, input.std_b, mat_size, lg, ug); - - // decide wheter the algorithm for small or for large matrices will be started - if (mat_size <= VIENNACL_BISECT_MAX_SMALL_MATRIX) - { - // initialize memory for result - viennacl::linalg::detail::ResultDataSmall<NumericT> result(mat_size); - - // run the kernel - viennacl::linalg::detail::computeEigenvaluesSmallMatrix(input, result, mat_size, lg, ug, precision); - - // get the result from the device and do some sanity checks, - viennacl::linalg::detail::processResultSmallMatrix(result, mat_size); - eigenvalues = result.std_eigenvalues; - bResult = true; - } - - else - { - // initialize memory for result - viennacl::linalg::detail::ResultDataLarge<NumericT> result(mat_size); - - // run the kernel - viennacl::linalg::detail::computeEigenvaluesLargeMatrix(input, result, mat_size, lg, ug, precision); - - // get the result from the device and do some sanity checks - bResult = viennacl::linalg::detail::processResultDataLargeMatrix(result, mat_size); - - eigenvalues = result.std_eigenvalues; - } - return bResult; -} - - -/////////////////////////////////////////////////////////////////////////// -//! @brief bisect The bisection algorithm computes the eigevalues -//! of a symmetric tridiagonal matrix. -//! @param diagonal diagonal elements of the matrix -//! @param superdiagonal superdiagonal elements of the matrix -//! @param eigenvalues Vectors with the eigenvalues in ascending order -//! @return return false if any errors occured -/// -//! overloaded function template: viennacl::vectors as parameters -template<typename NumericT> -bool -bisect(const viennacl::vector<NumericT> & diagonal, const viennacl::vector<NumericT> & superdiagonal, viennacl::vector<NumericT> & eigenvalues) -{ - assert(diagonal.size() == superdiagonal.size() && - diagonal.size() == eigenvalues.size() && - bool("Input vectors do not have the same sizes!")); - bool bResult = false; - // flag if the matrix size is due to explicit user request - // desired precision of eigenvalues - NumericT precision = static_cast<NumericT>(0.00001); - const unsigned int mat_size = static_cast<unsigned int>(diagonal.size()); - - // set up input - viennacl::linalg::detail::InputData<NumericT> input(diagonal, superdiagonal, mat_size); - - NumericT lg = FLT_MAX; - NumericT ug = -FLT_MAX; - // compute Gerschgorin interval - viennacl::linalg::detail::computeGerschgorin(input.std_a, input.std_b, mat_size, lg, ug); - - // decide wheter the algorithm for small or for large matrices will be started - if (mat_size <= VIENNACL_BISECT_MAX_SMALL_MATRIX) - { - // initialize memory for result - viennacl::linalg::detail::ResultDataSmall<NumericT> result(mat_size); - - // run the kernel - viennacl::linalg::detail::computeEigenvaluesSmallMatrix(input, result, mat_size, lg, ug, precision); - - // get the result from the device and do some sanity checks, - viennacl::linalg::detail::processResultSmallMatrix(result, mat_size); - copy(result.std_eigenvalues, eigenvalues); - bResult = true; - } - - else - { - // initialize memory for result - viennacl::linalg::detail::ResultDataLarge<NumericT> result(mat_size); - - // run the kernel - viennacl::linalg::detail::computeEigenvaluesLargeMatrix(input, result, mat_size, lg, ug, precision); - - // get the result from the device and do some sanity checks - bResult = viennacl::linalg::detail::processResultDataLargeMatrix(result, mat_size); - - copy(result.std_eigenvalues, eigenvalues); - } - return bResult; -} -} // namespace linalg -} // namespace viennacl -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp deleted file mode 100644 index 93aae81..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp +++ /dev/null @@ -1,440 +0,0 @@ -#ifndef VIENNACL_LINALG_CG_HPP_ -#define VIENNACL_LINALG_CG_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/cg.hpp - @brief The conjugate gradient method is implemented here -*/ - -#include <vector> -#include <map> -#include <cmath> -#include <numeric> - -#include "viennacl/forwards.h" -#include "viennacl/tools/tools.hpp" -#include "viennacl/linalg/ilu.hpp" -#include "viennacl/linalg/prod.hpp" -#include "viennacl/linalg/inner_prod.hpp" -#include "viennacl/linalg/norm_2.hpp" -#include "viennacl/traits/clear.hpp" -#include "viennacl/traits/size.hpp" -#include "viennacl/meta/result_of.hpp" -#include "viennacl/linalg/iterative_operations.hpp" - -namespace viennacl -{ -namespace linalg -{ - -/** @brief A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve() function -*/ -class cg_tag -{ -public: - /** @brief The constructor - * - * @param tol Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||) - * @param max_iterations The maximum number of iterations - */ - cg_tag(double tol = 1e-8, unsigned int max_iterations = 300) : tol_(tol), abs_tol_(0), iterations_(max_iterations) {} - - /** @brief Returns the relative tolerance */ - double tolerance() const { return tol_; } - - /** @brief Returns the absolute tolerance */ - double abs_tolerance() const { return abs_tol_; } - /** @brief Sets the absolute tolerance */ - void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; } - - /** @brief Returns the maximum number of iterations */ - unsigned int max_iterations() const { return iterations_; } - - /** @brief Return the number of solver iterations: */ - unsigned int iters() const { return iters_taken_; } - void iters(unsigned int i) const { iters_taken_ = i; } - - /** @brief Returns the estimated relative error at the end of the solver run */ - double error() const { return last_error_; } - /** @brief Sets the estimated relative error at the end of the solver run */ - void error(double e) const { last_error_ = e; } - - -private: - double tol_; - double abs_tol_; - unsigned int iterations_; - - //return values from solver - mutable unsigned int iters_taken_; - mutable double last_error_; -}; - -namespace detail -{ - - /** @brief handles the no_precond case at minimal overhead */ - template<typename VectorT, typename PreconditionerT> - class z_handler{ - public: - z_handler(VectorT & residual) : z_(residual){ } - VectorT & get() { return z_; } - private: - VectorT z_; - }; - - template<typename VectorT> - class z_handler<VectorT, viennacl::linalg::no_precond>{ - public: - z_handler(VectorT & residual) : presidual_(&residual){ } - VectorT & get() { return *presidual_; } - private: - VectorT * presidual_; - }; - -} - -namespace detail -{ - - /** @brief Implementation of a pipelined conjugate gradient algorithm (no preconditioner), specialized for ViennaCL types. - * - * Pipelined version from A. T. Chronopoulos and C. W. Gear, J. Comput. Appl. Math. 25(2), 153â168 (1989) - * - * @param A The system matrix - * @param rhs The load vector - * @param tag Solver configuration tag - * @param monitor A callback routine which is called at each GMRES restart - * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data - * @return The result vector - */ - //template<typename MatrixType, typename ScalarType> - template<typename MatrixT, typename NumericT> - viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType const & A, - viennacl::vector<NumericT> const & rhs, - cg_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - typedef typename viennacl::vector<NumericT>::difference_type difference_type; - - viennacl::vector<NumericT> result(rhs); - viennacl::traits::clear(result); - - viennacl::vector<NumericT> residual(rhs); - viennacl::vector<NumericT> p(rhs); - viennacl::vector<NumericT> Ap = viennacl::linalg::prod(A, p); - viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(3*256, viennacl::traits::context(rhs)); // temporary buffer - std::vector<NumericT> host_inner_prod_buffer(inner_prod_buffer.size()); - vcl_size_t buffer_size_per_vector = inner_prod_buffer.size() / 3; - difference_type buffer_offset_per_vector = static_cast<difference_type>(buffer_size_per_vector); - - NumericT norm_rhs_squared = viennacl::linalg::norm_2(residual); norm_rhs_squared *= norm_rhs_squared; - - if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //check for early convergence of A*x = 0 - return result; - - NumericT inner_prod_rr = norm_rhs_squared; - NumericT alpha = inner_prod_rr / viennacl::linalg::inner_prod(p, Ap); - NumericT beta = viennacl::linalg::norm_2(Ap); beta = (alpha * alpha * beta * beta - inner_prod_rr) / inner_prod_rr; - NumericT inner_prod_ApAp = 0; - NumericT inner_prod_pAp = 0; - - for (unsigned int i = 0; i < tag.max_iterations(); ++i) - { - tag.iters(i+1); - - viennacl::linalg::pipelined_cg_vector_update(result, alpha, p, residual, Ap, beta, inner_prod_buffer); - viennacl::linalg::pipelined_cg_prod(A, p, Ap, inner_prod_buffer); - - // bring back the partial results to the host: - viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin()); - - inner_prod_rr = std::accumulate(host_inner_prod_buffer.begin(), host_inner_prod_buffer.begin() + buffer_offset_per_vector, NumericT(0)); - inner_prod_ApAp = std::accumulate(host_inner_prod_buffer.begin() + buffer_offset_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, NumericT(0)); - inner_prod_pAp = std::accumulate(host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, host_inner_prod_buffer.begin() + 3 * buffer_offset_per_vector, NumericT(0)); - - if (monitor && monitor(result, std::sqrt(std::fabs(inner_prod_rr / norm_rhs_squared)), monitor_data)) - break; - if (std::fabs(inner_prod_rr / norm_rhs_squared) < tag.tolerance() * tag.tolerance() || std::fabs(inner_prod_rr) < tag.abs_tolerance() * tag.abs_tolerance()) //squared norms involved here - break; - - alpha = inner_prod_rr / inner_prod_pAp; - beta = (alpha*alpha*inner_prod_ApAp - inner_prod_rr) / inner_prod_rr; - } - - //store last error estimate: - tag.error(std::sqrt(std::fabs(inner_prod_rr) / norm_rhs_squared)); - - return result; - } - - - /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ - template<typename NumericT> - viennacl::vector<NumericT> solve_impl(viennacl::compressed_matrix<NumericT> const & A, - viennacl::vector<NumericT> const & rhs, - cg_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); - } - - - /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ - template<typename NumericT> - viennacl::vector<NumericT> solve_impl(viennacl::coordinate_matrix<NumericT> const & A, - viennacl::vector<NumericT> const & rhs, - cg_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); - } - - - - /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ - template<typename NumericT> - viennacl::vector<NumericT> solve_impl(viennacl::ell_matrix<NumericT> const & A, - viennacl::vector<NumericT> const & rhs, - cg_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); - } - - - - /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ - template<typename NumericT> - viennacl::vector<NumericT> solve_impl(viennacl::sliced_ell_matrix<NumericT> const & A, - viennacl::vector<NumericT> const & rhs, - cg_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); - } - - - /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ - template<typename NumericT> - viennacl::vector<NumericT> solve_impl(viennacl::hyb_matrix<NumericT> const & A, - viennacl::vector<NumericT> const & rhs, - cg_tag const & tag, - viennacl::linalg::no_precond, - bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, - void *monitor_data = NULL) - { - return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); - } - - - template<typename MatrixT, typename VectorT, typename PreconditionerT> - VectorT solve_impl(MatrixT const & matrix, - VectorT const & rhs, - cg_tag const & tag, - PreconditionerT const & precond, - bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL, - void *monitor_data = NULL) - { - typedef typename viennacl::result_of::value_type<VectorT>::type NumericType; - typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType; - - VectorT result = rhs; - viennacl::traits::clear(result); - - VectorT residual = rhs; - VectorT tmp = rhs; - detail::z_handler<VectorT, PreconditionerT> zhandler(residual); - VectorT & z = zhandler.get(); - - precond.apply(z); - VectorT p = z; - - CPU_NumericType ip_rr = viennacl::linalg::inner_prod(residual, z); - CPU_NumericType alpha; - CPU_NumericType new_ip_rr = 0; - CPU_NumericType beta; - CPU_NumericType norm_rhs_squared = ip_rr; - CPU_NumericType new_ipp_rr_over_norm_rhs; - - if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //solution is zero if RHS norm (squared) is zero - return result; - - for (unsigned int i = 0; i < tag.max_iterations(); ++i) - { - tag.iters(i+1); - tmp = viennacl::linalg::prod(matrix, p); - - alpha = ip_rr / viennacl::linalg::inner_prod(tmp, p); - - result += alpha * p; - residual -= alpha * tmp; - z = residual; - precond.apply(z); - - if (static_cast<VectorT*>(&residual)==static_cast<VectorT*>(&z)) - new_ip_rr = std::pow(viennacl::linalg::norm_2(residual),2); - else - new_ip_rr = viennacl::linalg::inner_prod(residual, z); - - new_ipp_rr_over_norm_rhs = new_ip_rr / norm_rhs_squared; - if (monitor && monitor(result, std::sqrt(std::fabs(new_ipp_rr_over_norm_rhs)), monitor_data)) - break; - if (std::fabs(new_ipp_rr_over_norm_rhs) < tag.tolerance() * tag.tolerance() || std::fabs(new_ip_rr) < tag.abs_tolerance() * tag.abs_tolerance()) //squared norms involved here - break; - - beta = new_ip_rr / ip_rr; - ip_rr = new_ip_rr; - - p = z + beta*p; - } - - //store last error estimate: - tag.error(std::sqrt(std::fabs(new_ip_rr / norm_rhs_squared))); - - return result; - } - -} - - - -/** @brief Implementation of the preconditioned conjugate gradient solver, generic implementation for non-ViennaCL types. -* -* Following Algorithm 9.1 in "Iterative Methods for Sparse Linear Systems" by Y. Saad -* -* @param matrix The system matrix -* @param rhs The load vector -* @param tag Solver configuration tag -* @param precond A preconditioner. Precondition operation is done via member function apply() -* @return The result vector -*/ -template<typename MatrixT, typename VectorT, typename PreconditionerT> -VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag, PreconditionerT const & precond) -{ - return detail::solve_impl(matrix, rhs, tag, precond); -} - -/** @brief Convenience overload for calling the CG solver using types from the C++ STL. - * - * A std::vector<std::map<T, U> > matrix is convenient for e.g. finite element assembly. - * It is not the fastest option for setting up a system, but often it is fast enough - particularly for just trying things out. - */ -template<typename IndexT, typename NumericT, typename PreconditionerT> -std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & A, std::vector<NumericT> const & rhs, cg_tag const & tag, PreconditionerT const & precond) -{ - viennacl::compressed_matrix<NumericT> vcl_A; - viennacl::copy(A, vcl_A); - - viennacl::vector<NumericT> vcl_rhs(rhs.size()); - viennacl::copy(rhs, vcl_rhs); - - viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond); - - std::vector<NumericT> result(vcl_result.size()); - viennacl::copy(vcl_result, result); - return result; -} - -/** @brief Entry point for the unpreconditioned CG method. - * - * @param matrix The system matrix - * @param rhs Right hand side vector (load vector) - * @param tag A BiCGStab tag providing relative tolerances, etc. - */ -template<typename MatrixT, typename VectorT> -VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag) -{ - return solve(matrix, rhs, tag, viennacl::linalg::no_precond()); -} - - - -template<typename VectorT> -class cg_solver -{ -public: - typedef typename viennacl::result_of::cpu_value_type<VectorT>::type numeric_type; - - cg_solver(cg_tag const & tag) : tag_(tag), monitor_callback_(NULL), user_data_(NULL) {} - - template<typename MatrixT, typename PreconditionerT> - VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT const & precond) const - { - if (viennacl::traits::size(init_guess_) > 0) // take initial guess into account - { - VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_); - mod_rhs = b - mod_rhs; - VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_); - return init_guess_ + y; - } - return detail::solve_impl(A, b, tag_, precond, monitor_callback_, user_data_); - } - - - template<typename MatrixT> - VectorT operator()(MatrixT const & A, VectorT const & b) const - { - return operator()(A, b, viennacl::linalg::no_precond()); - } - - /** @brief Specifies an initial guess for the iterative solver. - * - * An iterative solver for Ax = b with initial guess x_0 is equivalent to an iterative solver for Ay = b' := b - Ax_0, where x = x_0 + y. - */ - void set_initial_guess(VectorT const & x) { init_guess_ = x; } - - /** @brief Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor. - * - * The monitor function is called with the current guess for the result as first argument and the current relative residual estimate as second argument. - * The third argument is a pointer to user-defined data, through which additional information can be passed. - * This pointer needs to be set with set_monitor_data. If not set, NULL is passed. - * If the montior function returns true, the solver terminates (either convergence or divergence). - */ - void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data) - { - monitor_callback_ = monitor_fun; - user_data_ = user_data; - } - - /** @brief Returns the solver tag containing basic configuration such as tolerances, etc. */ - cg_tag const & tag() const { return tag_; } - -private: - cg_tag tag_; - VectorT init_guess_; - bool (*monitor_callback_)(VectorT const &, numeric_type, void *); - void *user_data_; -}; - - -} -} - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp deleted file mode 100644 index 5325b7b..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp +++ /dev/null @@ -1,75 +0,0 @@ -#ifndef VIENNACL_LINALG_CIRCULANT_MATRIX_OPERATIONS_HPP_ -#define VIENNACL_LINALG_CIRCULANT_MATRIX_OPERATIONS_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/circulant_matrix_operations.hpp - @brief Implementations of operations using circulant_matrix. Experimental. -*/ - -#include "viennacl/forwards.h" -#include "viennacl/ocl/backend.hpp" -#include "viennacl/scalar.hpp" -#include "viennacl/vector.hpp" -#include "viennacl/tools/tools.hpp" -#include "viennacl/fft.hpp" -//#include "viennacl/linalg/kernels/coordinate_matrix_kernels.h" - -namespace viennacl -{ -namespace linalg -{ - -// A * x - -/** @brief Carries out matrix-vector multiplication with a circulant_matrix -* -* Implementation of the convenience expression result = prod(mat, vec); -* -* @param mat The matrix -* @param vec The vector -* @param result The result vector -*/ -template<typename NumericT, unsigned int AlignmentV> -void prod_impl(viennacl::circulant_matrix<NumericT, AlignmentV> const & mat, - viennacl::vector_base<NumericT> const & vec, - viennacl::vector_base<NumericT> & result) -{ - assert(mat.size1() == result.size() && bool("Dimension mismatch")); - assert(mat.size2() == vec.size() && bool("Dimension mismatch")); - //result.clear(); - - //std::cout << "prod(circulant_matrix" << ALIGNMENT << ", vector) called with internal_nnz=" << mat.internal_nnz() << std::endl; - - viennacl::vector<NumericT> circ(mat.elements().size() * 2); - viennacl::linalg::real_to_complex(mat.elements(), circ, mat.elements().size()); - - viennacl::vector<NumericT> tmp(vec.size() * 2); - viennacl::vector<NumericT> tmp2(vec.size() * 2); - - viennacl::linalg::real_to_complex(vec, tmp, vec.size()); - viennacl::linalg::convolve(circ, tmp, tmp2); - viennacl::linalg::complex_to_real(tmp2, result, vec.size()); - -} - -} //namespace linalg -} //namespace viennacl - - -#endif
