http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp new file mode 100644 index 0000000..0b81203 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/amg.hpp @@ -0,0 +1,398 @@ +#ifndef VIENNACL_LINALG_AMG_HPP_ +#define VIENNACL_LINALG_AMG_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/amg.hpp + @brief Main include file for algebraic multigrid (AMG) preconditioners. Experimental. + + Implementation contributed by Markus Wagner +*/ + +#include <vector> +#include <cmath> +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/linalg/prod.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/compressed_matrix.hpp" + +#include "viennacl/linalg/detail/amg/amg_base.hpp" +#include "viennacl/linalg/sparse_matrix_operations.hpp" +#include "viennacl/linalg/amg_operations.hpp" +#include "viennacl/tools/timer.hpp" +#include "viennacl/linalg/direct_solve.hpp" +#include "viennacl/linalg/lu.hpp" + +#include <map> + +#ifdef VIENNACL_WITH_OPENMP + #include <omp.h> +#endif + +#define VIENNACL_AMG_MAX_LEVELS 20 + +namespace viennacl +{ +namespace linalg +{ + +class amg_coarse_problem_too_large_exception : public std::runtime_error +{ +public: + amg_coarse_problem_too_large_exception(std::string const & msg, vcl_size_t num_points) : std::runtime_error(msg), c_points_(num_points) {} + + /** @brief Returns the number of coarse points for which no further coarsening could be applied */ + vcl_size_t coarse_points() const { return c_points_; } + +private: + vcl_size_t c_points_; +}; + + +namespace detail +{ + /** @brief Sparse Galerkin product: Calculates A_coarse = trans(P)*A_fine*P = R*A_fine*P + * + * @param A_fine Operator matrix on fine grid (quadratic) + * @param P Prolongation/Interpolation matrix + * @param R Restriction matrix + * @param A_coarse Result matrix on coarse grid (Galerkin operator) + */ + template<typename NumericT> + void amg_galerkin_prod(compressed_matrix<NumericT> & A_fine, + compressed_matrix<NumericT> & P, + compressed_matrix<NumericT> & R, //P^T + compressed_matrix<NumericT> & A_coarse) + { + + compressed_matrix<NumericT> A_fine_times_P(viennacl::traits::context(A_fine)); + + // transpose P in memory (no known way of efficiently multiplying P^T * B for CSR-matrices P and B): + viennacl::linalg::detail::amg::amg_transpose(P, R); + + // compute Galerkin product using a temporary for the result of A_fine * P + A_fine_times_P = viennacl::linalg::prod(A_fine, P); + A_coarse = viennacl::linalg::prod(R, A_fine_times_P); + + } + + + /** @brief Setup AMG preconditioner + * + * @param list_of_A Operator matrices on all levels + * @param list_of_P Prolongation/Interpolation operators on all levels + * @param list_of_R Restriction operators on all levels + * @param list_of_amg_level_context Auxiliary datastructures for managing the grid hierarchy (coarse nodes, etc.) + * @param tag AMG preconditioner tag + */ + template<typename NumericT, typename AMGContextListT> + vcl_size_t amg_setup(std::vector<compressed_matrix<NumericT> > & list_of_A, + std::vector<compressed_matrix<NumericT> > & list_of_P, + std::vector<compressed_matrix<NumericT> > & list_of_R, + AMGContextListT & list_of_amg_level_context, + amg_tag & tag) + { + // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process. + vcl_size_t iterations = tag.get_coarse_levels(); + if (iterations == 0) + iterations = VIENNACL_AMG_MAX_LEVELS; + + for (vcl_size_t i=0; i<iterations; ++i) + { + list_of_amg_level_context[i].switch_context(tag.get_setup_context()); + list_of_amg_level_context[i].resize(list_of_A[i].size1(), list_of_A[i].nnz()); + + // Construct C and F points on coarse level (i is fine level, i+1 coarse level). + detail::amg::amg_coarse(list_of_A[i], list_of_amg_level_context[i], tag); + + // Calculate number of C and F points on level i. + unsigned int c_points = list_of_amg_level_context[i].num_coarse_; + unsigned int f_points = static_cast<unsigned int>(list_of_A[i].size1()) - c_points; + + if (f_points == 0 && c_points > tag.get_coarsening_cutoff()) + { + std::stringstream ss; + ss << "No further coarsening possible (" << c_points << " coarse points). Consider changing the strong connection threshold or increasing the coarsening cutoff." << std::endl; + throw amg_coarse_problem_too_large_exception(ss.str(), c_points); + } + + // Stop routine when the maximal coarse level is found (no C or F point). Coarsest level is level i. + if (c_points == 0 || f_points == 0) + break; + + // Construct interpolation matrix for level i. + detail::amg::amg_interpol(list_of_A[i], list_of_P[i], list_of_amg_level_context[i], tag); + + // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = trans(P). + amg_galerkin_prod(list_of_A[i], list_of_P[i], list_of_R[i], list_of_A[i+1]); + + // send matrices to target context: + list_of_A[i].switch_memory_context(tag.get_target_context()); + list_of_P[i].switch_memory_context(tag.get_target_context()); + list_of_R[i].switch_memory_context(tag.get_target_context()); + + // If Limit of coarse points is reached then stop. Coarsest level is level i+1. + if (tag.get_coarse_levels() == 0 && c_points <= tag.get_coarsening_cutoff()) + return i+1; + } + + return iterations; + } + + + /** @brief Initialize AMG preconditioner + * + * @param mat System matrix + * @param list_of_A Operator matrices on all levels + * @param list_of_P Prolongation/Interpolation operators on all levels + * @param list_of_R Restriction operators on all levels + * @param list_of_amg_level_context Auxiliary datastructures for managing the grid hierarchy (coarse nodes, etc.) + * @param tag AMG preconditioner tag + */ + template<typename MatrixT, typename InternalT1, typename InternalT2> + void amg_init(MatrixT const & mat, InternalT1 & list_of_A, InternalT1 & list_of_P, InternalT1 & list_of_R, InternalT2 & list_of_amg_level_context, amg_tag & tag) + { + typedef typename InternalT1::value_type SparseMatrixType; + + vcl_size_t num_levels = (tag.get_coarse_levels() > 0) ? tag.get_coarse_levels() : VIENNACL_AMG_MAX_LEVELS; + + list_of_A.resize(num_levels+1, SparseMatrixType(tag.get_setup_context())); + list_of_P.resize(num_levels, SparseMatrixType(tag.get_setup_context())); + list_of_R.resize(num_levels, SparseMatrixType(tag.get_setup_context())); + list_of_amg_level_context.resize(num_levels); + + // Insert operator matrix as operator for finest level. + //SparseMatrixType A0(mat); + //A.insert_element(0, A0); + list_of_A[0].switch_memory_context(viennacl::traits::context(mat)); + list_of_A[0] = mat; + list_of_A[0].switch_memory_context(tag.get_setup_context()); + } + + /** @brief Setup data structures for precondition phase for later use on the GPU + * + * @param result Result vector on all levels + * @param result_backup Copy of result vector on all levels + * @param rhs RHS vector on all levels + * @param residual Residual vector on all levels + * @param A Operators matrices on all levels from setup phase + * @param coarse_levels Number of coarse levels for which the datastructures should be set up. + * @param tag AMG preconditioner tag + */ + template<typename InternalVectorT, typename SparseMatrixT> + void amg_setup_apply(InternalVectorT & result, + InternalVectorT & result_backup, + InternalVectorT & rhs, + InternalVectorT & residual, + SparseMatrixT const & A, + vcl_size_t coarse_levels, + amg_tag const & tag) + { + typedef typename InternalVectorT::value_type VectorType; + + result.resize(coarse_levels + 1); + result_backup.resize(coarse_levels + 1); + rhs.resize(coarse_levels + 1); + residual.resize(coarse_levels); + + for (vcl_size_t level=0; level <= coarse_levels; ++level) + { + result[level] = VectorType(A[level].size1(), tag.get_target_context()); + result_backup[level] = VectorType(A[level].size1(), tag.get_target_context()); + rhs[level] = VectorType(A[level].size1(), tag.get_target_context()); + } + for (vcl_size_t level=0; level < coarse_levels; ++level) + { + residual[level] = VectorType(A[level].size1(), tag.get_target_context()); + } + } + + + /** @brief Pre-compute LU factorization for direct solve (ublas library). + * + * Speeds up precondition phase as this is computed only once overall instead of once per iteration. + * + * @param op Operator matrix for direct solve + * @param A Operator matrix on coarsest level + * @param tag AMG preconditioner tag + */ + template<typename NumericT, typename SparseMatrixT> + void amg_lu(viennacl::matrix<NumericT> & op, + SparseMatrixT const & A, + amg_tag const & tag) + { + op.switch_memory_context(tag.get_setup_context()); + op.resize(A.size1(), A.size2(), false); + viennacl::linalg::detail::amg::assign_to_dense(A, op); + + viennacl::linalg::lu_factorize(op); + op.switch_memory_context(tag.get_target_context()); + } + +} + +/** @brief AMG preconditioner class, can be supplied to solve()-routines +*/ +template<typename MatrixT> +class amg_precond; + + +/** @brief AMG preconditioner class, can be supplied to solve()-routines. +* +* Specialization for compressed_matrix +*/ +template<typename NumericT, unsigned int AlignmentV> +class amg_precond< compressed_matrix<NumericT, AlignmentV> > +{ + typedef viennacl::compressed_matrix<NumericT, AlignmentV> SparseMatrixType; + typedef viennacl::vector<NumericT> VectorType; + typedef detail::amg::amg_level_context AMGContextType; + +public: + + amg_precond() {} + + /** @brief The constructor. Builds data structures. + * + * @param mat System matrix + * @param tag The AMG tag + */ + amg_precond(compressed_matrix<NumericT, AlignmentV> const & mat, + amg_tag const & tag) + { + tag_ = tag; + + // Initialize data structures. + detail::amg_init(mat, A_list_, P_list_, R_list_, amg_context_list_, tag_); + } + + /** @brief Start setup phase for this class and copy data structures. + */ + void setup() + { + // Start setup phase. + vcl_size_t num_coarse_levels = detail::amg_setup(A_list_, P_list_, R_list_, amg_context_list_, tag_); + + // Setup precondition phase (Data structures). + detail::amg_setup_apply(result_list_, result_backup_list_, rhs_list_, residual_list_, A_list_, num_coarse_levels, tag_); + + // LU factorization for direct solve. + detail::amg_lu(coarsest_op_, A_list_[num_coarse_levels], tag_); + } + + + /** @brief Precondition Operation + * + * @param vec The vector to which preconditioning is applied to + */ + template<typename VectorT> + void apply(VectorT & vec) const + { + vcl_size_t level; + + // Precondition operation (Yang, p.3). + rhs_list_[0] = vec; + + // Part 1: Restrict down to coarsest level + for (level=0; level < residual_list_.size(); level++) + { + result_list_[level].clear(); + + // Apply Smoother presmooth_ times. + viennacl::linalg::detail::amg::smooth_jacobi(static_cast<unsigned int>(tag_.get_presmooth_steps()), + A_list_[level], + result_list_[level], + result_backup_list_[level], + rhs_list_[level], + static_cast<NumericT>(tag_.get_jacobi_weight())); + + // Compute residual. + //residual[level] = rhs_[level] - viennacl::linalg::prod(A_[level], result_[level]); + residual_list_[level] = viennacl::linalg::prod(A_list_[level], result_list_[level]); + residual_list_[level] = rhs_list_[level] - residual_list_[level]; + + // Restrict to coarse level. Result is RHS of coarse level equation. + //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]); + rhs_list_[level+1] = viennacl::linalg::prod(R_list_[level], residual_list_[level]); + } + + // Part 2: On highest level use direct solve to solve equation (on the CPU) + result_list_[level] = rhs_list_[level]; + viennacl::linalg::lu_substitute(coarsest_op_, result_list_[level]); + + // Part 3: Prolongation to finest level + for (int level2 = static_cast<int>(residual_list_.size()-1); level2 >= 0; level2--) + { + level = static_cast<vcl_size_t>(level2); + + // Interpolate error to fine level and correct solution. + result_backup_list_[level] = viennacl::linalg::prod(P_list_[level], result_list_[level+1]); + result_list_[level] += result_backup_list_[level]; + + // Apply Smoother postsmooth_ times. + viennacl::linalg::detail::amg::smooth_jacobi(static_cast<unsigned int>(tag_.get_postsmooth_steps()), + A_list_[level], + result_list_[level], + result_backup_list_[level], + rhs_list_[level], + static_cast<NumericT>(tag_.get_jacobi_weight())); + } + vec = result_list_[0]; + } + + /** @brief Returns the total number of multigrid levels in the hierarchy including the finest level. */ + vcl_size_t levels() const { return residual_list_.size(); } + + + /** @brief Returns the problem/operator size at the respective multigrid level + * + * @param level Index of the multigrid level. 0 is the finest level, levels() - 1 is the coarsest level. + */ + vcl_size_t size(vcl_size_t level) const + { + assert(level < levels() && bool("Level index out of bounds!")); + return residual_list_[level].size(); + } + + /** @brief Returns the associated preconditioner tag containing the configuration for the multigrid preconditioner. */ + amg_tag const & tag() const { return tag_; } + +private: + std::vector<SparseMatrixType> A_list_; + std::vector<SparseMatrixType> P_list_; + std::vector<SparseMatrixType> R_list_; + std::vector<AMGContextType> amg_context_list_; + + viennacl::matrix<NumericT> coarsest_op_; + + mutable std::vector<VectorType> result_list_; + mutable std::vector<VectorType> result_backup_list_; + mutable std::vector<VectorType> rhs_list_; + mutable std::vector<VectorType> residual_list_; + + amg_tag tag_; +}; + +} +} + + + +#endif +
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp new file mode 100644 index 0000000..9c7f79f --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/amg_operations.hpp @@ -0,0 +1,238 @@ +#ifndef VIENNACL_LINALG_AMG_OPERATIONS_HPP_ +#define VIENNACL_LINALG_AMG_OPERATIONS_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/amg_operations.hpp + @brief Implementations of operations for algebraic multigrid +*/ + +#include "viennacl/forwards.h" +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" +#include "viennacl/tools/tools.hpp" +#include "viennacl/linalg/detail/amg/amg_base.hpp" +#include "viennacl/linalg/host_based/amg_operations.hpp" + +#ifdef VIENNACL_WITH_OPENCL + #include "viennacl/linalg/opencl/amg_operations.hpp" +#endif + +#ifdef VIENNACL_WITH_CUDA + #include "viennacl/linalg/cuda/amg_operations.hpp" +#endif + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ +namespace amg +{ + +template<typename NumericT, typename AMGContextT> +void amg_influence(compressed_matrix<NumericT> const & A, AMGContextT & amg_context, amg_tag & tag) +{ + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::amg::amg_influence(A, amg_context, tag); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::amg::amg_influence(A, amg_context, tag); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::amg::amg_influence(A, amg_context, tag); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } +} + + +template<typename NumericT, typename AMGContextT> +void amg_coarse(compressed_matrix<NumericT> const & A, AMGContextT & amg_context, amg_tag & tag) +{ + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::amg::amg_coarse(A, amg_context, tag); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::amg::amg_coarse(A, amg_context, tag); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::amg::amg_coarse(A, amg_context, tag); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } +} + + +template<typename NumericT, typename AMGContextT> +void amg_interpol(compressed_matrix<NumericT> const & A, + compressed_matrix<NumericT> & P, + AMGContextT & amg_context, + amg_tag & tag) +{ + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::amg::amg_interpol(A, P, amg_context, tag); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::amg::amg_interpol(A, P, amg_context, tag); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::amg::amg_interpol(A, P, amg_context, tag); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } +} + + +template<typename NumericT> +void amg_transpose(compressed_matrix<NumericT> & A, + compressed_matrix<NumericT> & B) +{ + viennacl::context orig_ctx = viennacl::traits::context(A); + viennacl::context cpu_ctx(viennacl::MAIN_MEMORY); + (void)orig_ctx; + (void)cpu_ctx; + + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::amg::amg_transpose(A, B); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + A.switch_memory_context(cpu_ctx); + B.switch_memory_context(cpu_ctx); + viennacl::linalg::host_based::amg::amg_transpose(A, B); + A.switch_memory_context(orig_ctx); + B.switch_memory_context(orig_ctx); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + A.switch_memory_context(cpu_ctx); + B.switch_memory_context(cpu_ctx); + viennacl::linalg::host_based::amg::amg_transpose(A, B); + A.switch_memory_context(orig_ctx); + B.switch_memory_context(orig_ctx); + //viennacl::linalg::cuda::amg_transpose(A, B); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } +} + +/** Assign sparse matrix A to dense matrix B */ +template<typename SparseMatrixType, typename NumericT> +typename viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type +assign_to_dense(SparseMatrixType const & A, + viennacl::matrix_base<NumericT> & B) +{ + assert( (A.size1() == B.size1()) && bool("Size check failed for assignment to dense matrix: size1(A) != size1(B)")); + assert( (A.size2() == B.size1()) && bool("Size check failed for assignment to dense matrix: size2(A) != size2(B)")); + + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::amg::assign_to_dense(A, B); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::amg::assign_to_dense(A, B); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::amg::assign_to_dense(A, B); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } +} + +template<typename NumericT> +void smooth_jacobi(unsigned int iterations, + compressed_matrix<NumericT> const & A, + vector<NumericT> & x, + vector<NumericT> & x_backup, + vector<NumericT> const & rhs_smooth, + NumericT weight) +{ + switch (viennacl::traits::handle(A).get_active_handle_id()) + { + case viennacl::MAIN_MEMORY: + viennacl::linalg::host_based::amg::smooth_jacobi(iterations, A, x, x_backup, rhs_smooth, weight); + break; +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::amg::smooth_jacobi(iterations, A, x, x_backup, rhs_smooth, weight); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::amg::smooth_jacobi(iterations, A, x, x_backup, rhs_smooth, weight); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } +} + +} //namespace amg +} //namespace detail +} //namespace linalg +} //namespace viennacl + + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp new file mode 100644 index 0000000..57bc89a --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/bicgstab.hpp @@ -0,0 +1,598 @@ +#ifndef VIENNACL_LINALG_BICGSTAB_HPP_ +#define VIENNACL_LINALG_BICGSTAB_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file bicgstab.hpp + @brief The stabilized bi-conjugate gradient method is implemented here +*/ + +#include <vector> +#include <cmath> +#include <numeric> + +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/linalg/prod.hpp" +#include "viennacl/linalg/inner_prod.hpp" +#include "viennacl/linalg/norm_2.hpp" +#include "viennacl/traits/clear.hpp" +#include "viennacl/traits/size.hpp" +#include "viennacl/traits/context.hpp" +#include "viennacl/meta/result_of.hpp" +#include "viennacl/linalg/iterative_operations.hpp" + +namespace viennacl +{ +namespace linalg +{ + +/** @brief A tag for the stabilized Bi-conjugate gradient solver. Used for supplying solver parameters and for dispatching the solve() function +*/ +class bicgstab_tag +{ +public: + /** @brief The constructor + * + * @param tol Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||) + * @param max_iters The maximum number of iterations + * @param max_iters_before_restart The maximum number of iterations before BiCGStab is reinitialized (to avoid accumulation of round-off errors) + */ + bicgstab_tag(double tol = 1e-8, vcl_size_t max_iters = 400, vcl_size_t max_iters_before_restart = 200) + : tol_(tol), abs_tol_(0), iterations_(max_iters), iterations_before_restart_(max_iters_before_restart) {} + + /** @brief Returns the relative tolerance */ + double tolerance() const { return tol_; } + + /** @brief Returns the absolute tolerance */ + double abs_tolerance() const { return abs_tol_; } + /** @brief Sets the absolute tolerance */ + void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; } + + /** @brief Returns the maximum number of iterations */ + vcl_size_t max_iterations() const { return iterations_; } + /** @brief Returns the maximum number of iterations before a restart*/ + vcl_size_t max_iterations_before_restart() const { return iterations_before_restart_; } + + /** @brief Return the number of solver iterations: */ + vcl_size_t iters() const { return iters_taken_; } + void iters(vcl_size_t i) const { iters_taken_ = i; } + + /** @brief Returns the estimated relative error at the end of the solver run */ + double error() const { return last_error_; } + /** @brief Sets the estimated relative error at the end of the solver run */ + void error(double e) const { last_error_ = e; } + +private: + double tol_; + double abs_tol_; + vcl_size_t iterations_; + vcl_size_t iterations_before_restart_; + + //return values from solver + mutable vcl_size_t iters_taken_; + mutable double last_error_; +}; + + + +namespace detail +{ + /** @brief Implementation of a pipelined stabilized Bi-conjugate gradient solver */ + template<typename MatrixT, typename NumericT> + viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType const & A, + viennacl::vector_base<NumericT> const & rhs, + bicgstab_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + viennacl::vector<NumericT> result = viennacl::zero_vector<NumericT>(rhs.size(), viennacl::traits::context(rhs)); + + viennacl::vector<NumericT> residual = rhs; + viennacl::vector<NumericT> p = rhs; + viennacl::vector<NumericT> r0star = rhs; + viennacl::vector<NumericT> Ap = rhs; + viennacl::vector<NumericT> s = rhs; + viennacl::vector<NumericT> As = rhs; + + // Layout of temporary buffer: + // chunk 0: <residual, r_0^*> + // chunk 1: <As, As> + // chunk 2: <As, s> + // chunk 3: <Ap, r_0^*> + // chunk 4: <As, r_0^*> + // chunk 5: <s, s> + vcl_size_t buffer_size_per_vector = 256; + vcl_size_t num_buffer_chunks = 6; + viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(num_buffer_chunks*buffer_size_per_vector, viennacl::traits::context(rhs)); // temporary buffer + std::vector<NumericT> host_inner_prod_buffer(inner_prod_buffer.size()); + + NumericT norm_rhs_host = viennacl::linalg::norm_2(residual); + NumericT beta; + NumericT alpha; + NumericT omega; + NumericT residual_norm = norm_rhs_host; + inner_prod_buffer[0] = norm_rhs_host * norm_rhs_host; + + NumericT r_dot_r0 = 0; + NumericT As_dot_As = 0; + NumericT As_dot_s = 0; + NumericT Ap_dot_r0 = 0; + NumericT As_dot_r0 = 0; + NumericT s_dot_s = 0; + + if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm is zero + return result; + + for (vcl_size_t i = 0; i < tag.max_iterations(); ++i) + { + tag.iters(i+1); + // Ap = A*p_j + // Ap_dot_r0 = <Ap, r_0^*> + viennacl::linalg::pipelined_bicgstab_prod(A, p, Ap, r0star, + inner_prod_buffer, buffer_size_per_vector, 3*buffer_size_per_vector); + + //////// first (weak) synchronization point //// + + ///// method 1: compute alpha on host: + // + //// we only need the second chunk of the buffer for computing Ap_dot_r0: + //viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin()); + //Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + buffer_size_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, ScalarType(0)); + + //alpha = residual_dot_r0 / Ap_dot_r0; + + //// s_j = r_j - alpha_j q_j + //s = residual - alpha * Ap; + + ///// method 2: compute alpha on device: + // s = r - alpha * Ap + // <s, s> first stage + // dump alpha at end of inner_prod_buffer + viennacl::linalg::pipelined_bicgstab_update_s(s, residual, Ap, + inner_prod_buffer, buffer_size_per_vector, 5*buffer_size_per_vector); + + // As = A*s_j + // As_dot_As = <As, As> + // As_dot_s = <As, s> + // As_dot_r0 = <As, r_0^*> + viennacl::linalg::pipelined_bicgstab_prod(A, s, As, r0star, + inner_prod_buffer, buffer_size_per_vector, 4*buffer_size_per_vector); + + //////// second (strong) synchronization point //// + + viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin()); + + typedef typename std::vector<NumericT>::difference_type difference_type; + + r_dot_r0 = std::accumulate(host_inner_prod_buffer.begin(), host_inner_prod_buffer.begin() + difference_type( buffer_size_per_vector), NumericT(0)); + As_dot_As = std::accumulate(host_inner_prod_buffer.begin() + difference_type( buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(2 * buffer_size_per_vector), NumericT(0)); + As_dot_s = std::accumulate(host_inner_prod_buffer.begin() + difference_type(2 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(3 * buffer_size_per_vector), NumericT(0)); + Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + difference_type(3 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(4 * buffer_size_per_vector), NumericT(0)); + As_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + difference_type(4 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(5 * buffer_size_per_vector), NumericT(0)); + s_dot_s = std::accumulate(host_inner_prod_buffer.begin() + difference_type(5 * buffer_size_per_vector), host_inner_prod_buffer.begin() + difference_type(6 * buffer_size_per_vector), NumericT(0)); + + alpha = r_dot_r0 / Ap_dot_r0; + beta = - As_dot_r0 / Ap_dot_r0; + omega = As_dot_s / As_dot_As; + + residual_norm = std::sqrt(s_dot_s - NumericT(2.0) * omega * As_dot_s + omega * omega * As_dot_As); + if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data)) + break; + if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance() || residual_norm < tag.abs_tolerance()) + break; + + // x_{j+1} = x_j + alpha * p_j + omega * s_j + // r_{j+1} = s_j - omega * t_j + // p_{j+1} = r_{j+1} + beta * (p_j - omega * q_j) + // and compute first stage of r_dot_r0 = <r_{j+1}, r_o^*> for use in next iteration + viennacl::linalg::pipelined_bicgstab_vector_update(result, alpha, p, omega, s, + residual, As, + beta, Ap, + r0star, inner_prod_buffer, buffer_size_per_vector); + } + + //store last error estimate: + tag.error(residual_norm / norm_rhs_host); + + return result; + } + + /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ + template<typename NumericT> + viennacl::vector<NumericT> solve_impl(viennacl::compressed_matrix<NumericT> const & A, + viennacl::vector<NumericT> const & rhs, + bicgstab_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); + } + + + /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ + template<typename NumericT> + viennacl::vector<NumericT> solve_impl(viennacl::coordinate_matrix<NumericT> const & A, + viennacl::vector<NumericT> const & rhs, + bicgstab_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); + } + + + + /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ + template<typename NumericT> + viennacl::vector<NumericT> solve_impl(viennacl::ell_matrix<NumericT> const & A, + viennacl::vector<NumericT> const & rhs, + bicgstab_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); + } + + + + /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ + template<typename NumericT> + viennacl::vector<NumericT> solve_impl(viennacl::sliced_ell_matrix<NumericT> const & A, + viennacl::vector<NumericT> const & rhs, + bicgstab_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); + } + + + /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ + template<typename NumericT> + viennacl::vector<NumericT> solve_impl(viennacl::hyb_matrix<NumericT> const & A, + viennacl::vector<NumericT> const & rhs, + bicgstab_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); + } + + + /** @brief Implementation of the unpreconditioned stabilized Bi-conjugate gradient solver + * + * Following the description in "Iterative Methods for Sparse Linear Systems" by Y. Saad + * + * @param matrix The system matrix + * @param rhs The load vector + * @param tag Solver configuration tag + * @param monitor A callback routine which is called at each GMRES restart + * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data + * @return The result vector + */ + template<typename MatrixT, typename VectorT> + VectorT solve_impl(MatrixT const & matrix, + VectorT const & rhs, + bicgstab_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL, + void *monitor_data = NULL) + { + typedef typename viennacl::result_of::value_type<VectorT>::type NumericType; + typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType; + VectorT result = rhs; + viennacl::traits::clear(result); + + VectorT residual = rhs; + VectorT p = rhs; + VectorT r0star = rhs; + VectorT tmp0 = rhs; + VectorT tmp1 = rhs; + VectorT s = rhs; + + CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual); + CPU_NumericType ip_rr0star = norm_rhs_host * norm_rhs_host; + CPU_NumericType beta; + CPU_NumericType alpha; + CPU_NumericType omega; + //ScalarType inner_prod_temp; //temporary variable for inner product computation + CPU_NumericType new_ip_rr0star = 0; + CPU_NumericType residual_norm = norm_rhs_host; + + if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm is zero + return result; + + bool restart_flag = true; + vcl_size_t last_restart = 0; + for (vcl_size_t i = 0; i < tag.max_iterations(); ++i) + { + if (restart_flag) + { + residual = viennacl::linalg::prod(matrix, result); + residual = rhs - residual; + p = residual; + r0star = residual; + ip_rr0star = viennacl::linalg::norm_2(residual); + ip_rr0star *= ip_rr0star; + restart_flag = false; + last_restart = i; + } + + tag.iters(i+1); + tmp0 = viennacl::linalg::prod(matrix, p); + alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star); + + s = residual - alpha*tmp0; + + tmp1 = viennacl::linalg::prod(matrix, s); + CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1); + omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1); + + result += alpha * p + omega * s; + residual = s - omega * tmp1; + + new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star); + residual_norm = viennacl::linalg::norm_2(residual); + if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data)) + break; + if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance() || residual_norm < tag.abs_tolerance()) + break; + + beta = new_ip_rr0star / ip_rr0star * alpha/omega; + ip_rr0star = new_ip_rr0star; + + if ( (ip_rr0star <= 0 && ip_rr0star >= 0) + || (omega <= 0 && omega >= 0) + || (i - last_restart > tag.max_iterations_before_restart()) + ) //search direction degenerate. A restart might help + restart_flag = true; + + // Execution of + // p = residual + beta * (p - omega*tmp0); + // without introducing temporary vectors: + p -= omega * tmp0; + p = residual + beta * p; + } + + //store last error estimate: + tag.error(residual_norm / norm_rhs_host); + + return result; + } + + + /** @brief Implementation of the preconditioned stabilized Bi-conjugate gradient solver + * + * Following the description of the unpreconditioned case in "Iterative Methods for Sparse Linear Systems" by Y. Saad + * + * @param matrix The system matrix + * @param rhs The load vector + * @param tag Solver configuration tag + * @param precond A preconditioner. Precondition operation is done via member function apply() + * @param monitor A callback routine which is called at each GMRES restart + * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data + * @return The result vector + */ + template<typename MatrixT, typename VectorT, typename PreconditionerT> + VectorT solve_impl(MatrixT const & matrix, + VectorT const & rhs, + bicgstab_tag const & tag, + PreconditionerT const & precond, + bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL, + void *monitor_data = NULL) + { + typedef typename viennacl::result_of::value_type<VectorT>::type NumericType; + typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType; + VectorT result = rhs; + viennacl::traits::clear(result); + + VectorT residual = rhs; + VectorT r0star = residual; //can be chosen arbitrarily in fact + VectorT tmp0 = rhs; + VectorT tmp1 = rhs; + VectorT s = rhs; + + VectorT p = residual; + + CPU_NumericType ip_rr0star = viennacl::linalg::norm_2(residual); + CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual); + CPU_NumericType beta; + CPU_NumericType alpha; + CPU_NumericType omega; + CPU_NumericType new_ip_rr0star = 0; + CPU_NumericType residual_norm = norm_rhs_host; + + if (norm_rhs_host <= tag.abs_tolerance()) //solution is zero if RHS norm is zero + return result; + + bool restart_flag = true; + vcl_size_t last_restart = 0; + for (unsigned int i = 0; i < tag.max_iterations(); ++i) + { + if (restart_flag) + { + residual = viennacl::linalg::prod(matrix, result); + residual = rhs - residual; + precond.apply(residual); + p = residual; + r0star = residual; + ip_rr0star = viennacl::linalg::norm_2(residual); + ip_rr0star *= ip_rr0star; + restart_flag = false; + last_restart = i; + } + + tag.iters(i+1); + tmp0 = viennacl::linalg::prod(matrix, p); + precond.apply(tmp0); + alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star); + + s = residual - alpha*tmp0; + + tmp1 = viennacl::linalg::prod(matrix, s); + precond.apply(tmp1); + CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1); + omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1); + + result += alpha * p + omega * s; + residual = s - omega * tmp1; + + residual_norm = viennacl::linalg::norm_2(residual); + if (monitor && monitor(result, std::fabs(residual_norm / norm_rhs_host), monitor_data)) + break; + if (residual_norm / norm_rhs_host < tag.tolerance() || residual_norm < tag.abs_tolerance()) + break; + + new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star); + + beta = new_ip_rr0star / ip_rr0star * alpha/omega; + ip_rr0star = new_ip_rr0star; + + if ( (ip_rr0star >= 0 && ip_rr0star <= 0) || (omega >=0 && omega <= 0) || i - last_restart > tag.max_iterations_before_restart()) //search direction degenerate. A restart might help + restart_flag = true; + + // Execution of + // p = residual + beta * (p - omega*tmp0); + // without introducing temporary vectors: + p -= omega * tmp0; + p = residual + beta * p; + + //std::cout << "Rel. Residual in current step: " << std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host)) << std::endl; + } + + //store last error estimate: + tag.error(residual_norm / norm_rhs_host); + + return result; + } + +} + + + +template<typename MatrixT, typename VectorT, typename PreconditionerT> +VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag, PreconditionerT const & precond) +{ + return detail::solve_impl(matrix, rhs, tag, precond); +} + + +/** @brief Convenience overload for calling the preconditioned BiCGStab solver using types from the C++ STL. + * + * A std::vector<std::map<T, U> > matrix is convenient for e.g. finite element assembly. + * It is not the fastest option for setting up a system, but often it is fast enough - particularly for just trying things out. + */ +template<typename IndexT, typename NumericT, typename PreconditionerT> +std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & A, std::vector<NumericT> const & rhs, bicgstab_tag const & tag, PreconditionerT const & precond) +{ + viennacl::compressed_matrix<NumericT> vcl_A; + viennacl::copy(A, vcl_A); + + viennacl::vector<NumericT> vcl_rhs(rhs.size()); + viennacl::copy(rhs, vcl_rhs); + + viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond); + + std::vector<NumericT> result(vcl_result.size()); + viennacl::copy(vcl_result, result); + return result; +} + +/** @brief Entry point for the unpreconditioned BiCGStab method. + * + * @param matrix The system matrix + * @param rhs Right hand side vector (load vector) + * @param tag A BiCGStab tag providing relative tolerances, etc. + */ +template<typename MatrixT, typename VectorT> +VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag) +{ + return solve(matrix, rhs, tag, viennacl::linalg::no_precond()); +} + + + +template<typename VectorT> +class bicgstab_solver +{ +public: + typedef typename viennacl::result_of::cpu_value_type<VectorT>::type numeric_type; + + bicgstab_solver(bicgstab_tag const & tag) : tag_(tag), monitor_callback_(NULL), user_data_(NULL) {} + + template<typename MatrixT, typename PreconditionerT> + VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT const & precond) const + { + if (viennacl::traits::size(init_guess_) > 0) // take initial guess into account + { + VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_); + mod_rhs = b - mod_rhs; + VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_); + return init_guess_ + y; + } + return detail::solve_impl(A, b, tag_, precond, monitor_callback_, user_data_); + } + + + template<typename MatrixT> + VectorT operator()(MatrixT const & A, VectorT const & b) const + { + return operator()(A, b, viennacl::linalg::no_precond()); + } + + /** @brief Specifies an initial guess for the iterative solver. + * + * An iterative solver for Ax = b with initial guess x_0 is equivalent to an iterative solver for Ay = b' := b - Ax_0, where x = x_0 + y. + */ + void set_initial_guess(VectorT const & x) { init_guess_ = x; } + + /** @brief Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor. + * + * The monitor function is called with the current guess for the result as first argument and the current relative residual estimate as second argument. + * The third argument is a pointer to user-defined data, through which additional information can be passed. + * This pointer needs to be set with set_monitor_data. If not set, NULL is passed. + * If the montior function returns true, the solver terminates (either convergence or divergence). + */ + void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data) + { + monitor_callback_ = monitor_fun; + user_data_ = user_data; + } + + /** @brief Returns the solver tag containing basic configuration such as tolerances, etc. */ + bicgstab_tag const & tag() const { return tag_; } + +private: + bicgstab_tag tag_; + VectorT init_guess_; + bool (*monitor_callback_)(VectorT const &, numeric_type, void *); + void *user_data_; +}; + + +} +} + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp new file mode 100644 index 0000000..a2daf5e --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect.hpp @@ -0,0 +1,179 @@ +#ifndef VIENNACL_LINALG_BISECT_HPP_ +#define VIENNACL_LINALG_BISECT_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/bisect.hpp +* @brief Implementation of the algorithm for finding eigenvalues of a tridiagonal matrix. +* +* Contributed by Guenther Mader and Astrid Rupp. +*/ + +#include <vector> +#include <cmath> +#include <limits> +#include <cstddef> +#include "viennacl/meta/result_of.hpp" + +namespace viennacl +{ +namespace linalg +{ + +namespace detail +{ + /** + * @brief overloaded function for copying vectors + */ + template<typename NumericT, typename OtherVectorT> + void copy_vec_to_vec(viennacl::vector<NumericT> const & src, OtherVectorT & dest) + { + viennacl::copy(src, dest); + } + + template<typename OtherVectorT, typename NumericT> + void copy_vec_to_vec(OtherVectorT const & src, viennacl::vector<NumericT> & dest) + { + viennacl::copy(src, dest); + } + + template<typename VectorT1, typename VectorT2> + void copy_vec_to_vec(VectorT1 const & src, VectorT2 & dest) + { + for (vcl_size_t i=0; i<src.size(); ++i) + dest[i] = src[i]; + } + +} //namespace detail + +/** +* @brief Implementation of the bisect-algorithm for the calculation of the eigenvalues of a tridiagonal matrix. Experimental - interface might change. +* +* Refer to "Calculation of the Eigenvalues of a Symmetric Tridiagonal Matrix by the Method of Bisection" in the Handbook Series Linear Algebra, contributed by Barth, Martin, and Wilkinson. +* http://www.maths.ed.ac.uk/~aar/papers/bamawi.pdf +* +* @param alphas Elements of the main diagonal +* @param betas Elements of the secondary diagonal +* @return Returns the eigenvalues of the tridiagonal matrix defined by alpha and beta +*/ +template<typename VectorT> +std::vector< + typename viennacl::result_of::cpu_value_type<typename VectorT::value_type>::type + > +bisect(VectorT const & alphas, VectorT const & betas) +{ + typedef typename viennacl::result_of::value_type<VectorT>::type NumericType; + typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType; + + vcl_size_t size = betas.size(); + std::vector<CPU_NumericType> x_temp(size); + + + std::vector<CPU_NumericType> beta_bisect; + std::vector<CPU_NumericType> wu; + + double rel_error = std::numeric_limits<CPU_NumericType>::epsilon(); + beta_bisect.push_back(0); + + for (vcl_size_t i = 1; i < size; i++) + beta_bisect.push_back(betas[i] * betas[i]); + + double xmin = alphas[size - 1] - std::fabs(betas[size - 1]); + double xmax = alphas[size - 1] + std::fabs(betas[size - 1]); + + for (vcl_size_t i = 0; i < size - 1; i++) + { + double h = std::fabs(betas[i]) + std::fabs(betas[i + 1]); + if (alphas[i] + h > xmax) + xmax = alphas[i] + h; + if (alphas[i] - h < xmin) + xmin = alphas[i] - h; + } + + + double eps1 = 1e-6; + /*double eps2 = (xmin + xmax > 0) ? (rel_error * xmax) : (-rel_error * xmin); + if (eps1 <= 0) + eps1 = eps2; + else + eps2 = 0.5 * eps1 + 7.0 * eps2; */ + + double x0 = xmax; + + for (vcl_size_t i = 0; i < size; i++) + { + x_temp[i] = xmax; + wu.push_back(xmin); + } + + for (long k = static_cast<long>(size) - 1; k >= 0; --k) + { + double xu = xmin; + for (long i = k; i >= 0; --i) + { + if (xu < wu[vcl_size_t(k-i)]) + { + xu = wu[vcl_size_t(i)]; + break; + } + } + + if (x0 > x_temp[vcl_size_t(k)]) + x0 = x_temp[vcl_size_t(k)]; + + double x1 = (xu + x0) / 2.0; + while (x0 - xu > 2.0 * rel_error * (std::fabs(xu) + std::fabs(x0)) + eps1) + { + vcl_size_t a = 0; + double q = 1; + for (vcl_size_t i = 0; i < size; i++) + { + if (q > 0 || q < 0) + q = alphas[i] - x1 - beta_bisect[i] / q; + else + q = alphas[i] - x1 - std::fabs(betas[i] / rel_error); + + if (q < 0) + a++; + } + + if (a <= static_cast<vcl_size_t>(k)) + { + xu = x1; + if (a < 1) + wu[0] = x1; + else + { + wu[a] = x1; + if (x_temp[a - 1] > x1) + x_temp[a - 1] = x1; + } + } + else + x0 = x1; + + x1 = (xu + x0) / 2.0; + } + x_temp[vcl_size_t(k)] = x1; + } + return x_temp; +} + +} // end namespace linalg +} // end namespace viennacl +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp new file mode 100644 index 0000000..6918b14 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/bisect_gpu.hpp @@ -0,0 +1,173 @@ +#ifndef VIENNACL_LINALG_BISECT_GPU +#define VIENNACL_LINALG_BISECT_GPU + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/linalg/bisect_gpu.hpp + @brief Implementation of an bisection algorithm for eigenvalues + + Implementation based on the sample provided with the CUDA 6.0 SDK, for which + the creation of derivative works is allowed by including the following statement: + "This software contains source code provided by NVIDIA Corporation." +*/ +// includes, system +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" + +// includes, project +#include "viennacl/linalg/detail/bisect/structs.hpp" +#include "viennacl/linalg/detail/bisect/gerschgorin.hpp" +#include "viennacl/linalg/detail/bisect/bisect_large.hpp" +#include "viennacl/linalg/detail/bisect/bisect_small.hpp" + + +namespace viennacl +{ +namespace linalg +{ +/////////////////////////////////////////////////////////////////////////// +//! @brief bisect The bisection algorithm computes the eigevalues +//! of a symmetric tridiagonal matrix. +//! @param diagonal diagonal elements of the matrix +//! @param superdiagonal superdiagonal elements of the matrix +//! @param eigenvalues Vectors with the eigenvalues in ascending order +//! @return return false if any errors occured +/// +//! overloaded function template: std::vectors as parameters +template<typename NumericT> +bool +bisect(const std::vector<NumericT> & diagonal, const std::vector<NumericT> & superdiagonal, std::vector<NumericT> & eigenvalues) +{ + assert(diagonal.size() == superdiagonal.size() && + diagonal.size() == eigenvalues.size() && + bool("Input vectors do not have the same sizes!")); + bool bResult = false; + // flag if the matrix size is due to explicit user request + // desired precision of eigenvalues + NumericT precision = static_cast<NumericT>(0.00001); + const unsigned int mat_size = static_cast<unsigned int>(diagonal.size()); + + // set up input + viennacl::linalg::detail::InputData<NumericT> input(diagonal, superdiagonal, mat_size); + + NumericT lg = FLT_MAX; + NumericT ug = -FLT_MAX; + // compute Gerschgorin interval + viennacl::linalg::detail::computeGerschgorin(input.std_a, input.std_b, mat_size, lg, ug); + + // decide wheter the algorithm for small or for large matrices will be started + if (mat_size <= VIENNACL_BISECT_MAX_SMALL_MATRIX) + { + // initialize memory for result + viennacl::linalg::detail::ResultDataSmall<NumericT> result(mat_size); + + // run the kernel + viennacl::linalg::detail::computeEigenvaluesSmallMatrix(input, result, mat_size, lg, ug, precision); + + // get the result from the device and do some sanity checks, + viennacl::linalg::detail::processResultSmallMatrix(result, mat_size); + eigenvalues = result.std_eigenvalues; + bResult = true; + } + + else + { + // initialize memory for result + viennacl::linalg::detail::ResultDataLarge<NumericT> result(mat_size); + + // run the kernel + viennacl::linalg::detail::computeEigenvaluesLargeMatrix(input, result, mat_size, lg, ug, precision); + + // get the result from the device and do some sanity checks + bResult = viennacl::linalg::detail::processResultDataLargeMatrix(result, mat_size); + + eigenvalues = result.std_eigenvalues; + } + return bResult; +} + + +/////////////////////////////////////////////////////////////////////////// +//! @brief bisect The bisection algorithm computes the eigevalues +//! of a symmetric tridiagonal matrix. +//! @param diagonal diagonal elements of the matrix +//! @param superdiagonal superdiagonal elements of the matrix +//! @param eigenvalues Vectors with the eigenvalues in ascending order +//! @return return false if any errors occured +/// +//! overloaded function template: viennacl::vectors as parameters +template<typename NumericT> +bool +bisect(const viennacl::vector<NumericT> & diagonal, const viennacl::vector<NumericT> & superdiagonal, viennacl::vector<NumericT> & eigenvalues) +{ + assert(diagonal.size() == superdiagonal.size() && + diagonal.size() == eigenvalues.size() && + bool("Input vectors do not have the same sizes!")); + bool bResult = false; + // flag if the matrix size is due to explicit user request + // desired precision of eigenvalues + NumericT precision = static_cast<NumericT>(0.00001); + const unsigned int mat_size = static_cast<unsigned int>(diagonal.size()); + + // set up input + viennacl::linalg::detail::InputData<NumericT> input(diagonal, superdiagonal, mat_size); + + NumericT lg = FLT_MAX; + NumericT ug = -FLT_MAX; + // compute Gerschgorin interval + viennacl::linalg::detail::computeGerschgorin(input.std_a, input.std_b, mat_size, lg, ug); + + // decide wheter the algorithm for small or for large matrices will be started + if (mat_size <= VIENNACL_BISECT_MAX_SMALL_MATRIX) + { + // initialize memory for result + viennacl::linalg::detail::ResultDataSmall<NumericT> result(mat_size); + + // run the kernel + viennacl::linalg::detail::computeEigenvaluesSmallMatrix(input, result, mat_size, lg, ug, precision); + + // get the result from the device and do some sanity checks, + viennacl::linalg::detail::processResultSmallMatrix(result, mat_size); + copy(result.std_eigenvalues, eigenvalues); + bResult = true; + } + + else + { + // initialize memory for result + viennacl::linalg::detail::ResultDataLarge<NumericT> result(mat_size); + + // run the kernel + viennacl::linalg::detail::computeEigenvaluesLargeMatrix(input, result, mat_size, lg, ug, precision); + + // get the result from the device and do some sanity checks + bResult = viennacl::linalg::detail::processResultDataLargeMatrix(result, mat_size); + + copy(result.std_eigenvalues, eigenvalues); + } + return bResult; +} +} // namespace linalg +} // namespace viennacl +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp new file mode 100644 index 0000000..93aae81 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/cg.hpp @@ -0,0 +1,440 @@ +#ifndef VIENNACL_LINALG_CG_HPP_ +#define VIENNACL_LINALG_CG_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/cg.hpp + @brief The conjugate gradient method is implemented here +*/ + +#include <vector> +#include <map> +#include <cmath> +#include <numeric> + +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/linalg/ilu.hpp" +#include "viennacl/linalg/prod.hpp" +#include "viennacl/linalg/inner_prod.hpp" +#include "viennacl/linalg/norm_2.hpp" +#include "viennacl/traits/clear.hpp" +#include "viennacl/traits/size.hpp" +#include "viennacl/meta/result_of.hpp" +#include "viennacl/linalg/iterative_operations.hpp" + +namespace viennacl +{ +namespace linalg +{ + +/** @brief A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve() function +*/ +class cg_tag +{ +public: + /** @brief The constructor + * + * @param tol Relative tolerance for the residual (solver quits if ||r|| < tol * ||r_initial||) + * @param max_iterations The maximum number of iterations + */ + cg_tag(double tol = 1e-8, unsigned int max_iterations = 300) : tol_(tol), abs_tol_(0), iterations_(max_iterations) {} + + /** @brief Returns the relative tolerance */ + double tolerance() const { return tol_; } + + /** @brief Returns the absolute tolerance */ + double abs_tolerance() const { return abs_tol_; } + /** @brief Sets the absolute tolerance */ + void abs_tolerance(double new_tol) { if (new_tol >= 0) abs_tol_ = new_tol; } + + /** @brief Returns the maximum number of iterations */ + unsigned int max_iterations() const { return iterations_; } + + /** @brief Return the number of solver iterations: */ + unsigned int iters() const { return iters_taken_; } + void iters(unsigned int i) const { iters_taken_ = i; } + + /** @brief Returns the estimated relative error at the end of the solver run */ + double error() const { return last_error_; } + /** @brief Sets the estimated relative error at the end of the solver run */ + void error(double e) const { last_error_ = e; } + + +private: + double tol_; + double abs_tol_; + unsigned int iterations_; + + //return values from solver + mutable unsigned int iters_taken_; + mutable double last_error_; +}; + +namespace detail +{ + + /** @brief handles the no_precond case at minimal overhead */ + template<typename VectorT, typename PreconditionerT> + class z_handler{ + public: + z_handler(VectorT & residual) : z_(residual){ } + VectorT & get() { return z_; } + private: + VectorT z_; + }; + + template<typename VectorT> + class z_handler<VectorT, viennacl::linalg::no_precond>{ + public: + z_handler(VectorT & residual) : presidual_(&residual){ } + VectorT & get() { return *presidual_; } + private: + VectorT * presidual_; + }; + +} + +namespace detail +{ + + /** @brief Implementation of a pipelined conjugate gradient algorithm (no preconditioner), specialized for ViennaCL types. + * + * Pipelined version from A. T. Chronopoulos and C. W. Gear, J. Comput. Appl. Math. 25(2), 153â168 (1989) + * + * @param A The system matrix + * @param rhs The load vector + * @param tag Solver configuration tag + * @param monitor A callback routine which is called at each GMRES restart + * @param monitor_data Data pointer to be passed to the callback routine to pass on user-specific data + * @return The result vector + */ + //template<typename MatrixType, typename ScalarType> + template<typename MatrixT, typename NumericT> + viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType const & A, + viennacl::vector<NumericT> const & rhs, + cg_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + typedef typename viennacl::vector<NumericT>::difference_type difference_type; + + viennacl::vector<NumericT> result(rhs); + viennacl::traits::clear(result); + + viennacl::vector<NumericT> residual(rhs); + viennacl::vector<NumericT> p(rhs); + viennacl::vector<NumericT> Ap = viennacl::linalg::prod(A, p); + viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(3*256, viennacl::traits::context(rhs)); // temporary buffer + std::vector<NumericT> host_inner_prod_buffer(inner_prod_buffer.size()); + vcl_size_t buffer_size_per_vector = inner_prod_buffer.size() / 3; + difference_type buffer_offset_per_vector = static_cast<difference_type>(buffer_size_per_vector); + + NumericT norm_rhs_squared = viennacl::linalg::norm_2(residual); norm_rhs_squared *= norm_rhs_squared; + + if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //check for early convergence of A*x = 0 + return result; + + NumericT inner_prod_rr = norm_rhs_squared; + NumericT alpha = inner_prod_rr / viennacl::linalg::inner_prod(p, Ap); + NumericT beta = viennacl::linalg::norm_2(Ap); beta = (alpha * alpha * beta * beta - inner_prod_rr) / inner_prod_rr; + NumericT inner_prod_ApAp = 0; + NumericT inner_prod_pAp = 0; + + for (unsigned int i = 0; i < tag.max_iterations(); ++i) + { + tag.iters(i+1); + + viennacl::linalg::pipelined_cg_vector_update(result, alpha, p, residual, Ap, beta, inner_prod_buffer); + viennacl::linalg::pipelined_cg_prod(A, p, Ap, inner_prod_buffer); + + // bring back the partial results to the host: + viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin()); + + inner_prod_rr = std::accumulate(host_inner_prod_buffer.begin(), host_inner_prod_buffer.begin() + buffer_offset_per_vector, NumericT(0)); + inner_prod_ApAp = std::accumulate(host_inner_prod_buffer.begin() + buffer_offset_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, NumericT(0)); + inner_prod_pAp = std::accumulate(host_inner_prod_buffer.begin() + 2 * buffer_offset_per_vector, host_inner_prod_buffer.begin() + 3 * buffer_offset_per_vector, NumericT(0)); + + if (monitor && monitor(result, std::sqrt(std::fabs(inner_prod_rr / norm_rhs_squared)), monitor_data)) + break; + if (std::fabs(inner_prod_rr / norm_rhs_squared) < tag.tolerance() * tag.tolerance() || std::fabs(inner_prod_rr) < tag.abs_tolerance() * tag.abs_tolerance()) //squared norms involved here + break; + + alpha = inner_prod_rr / inner_prod_pAp; + beta = (alpha*alpha*inner_prod_ApAp - inner_prod_rr) / inner_prod_rr; + } + + //store last error estimate: + tag.error(std::sqrt(std::fabs(inner_prod_rr) / norm_rhs_squared)); + + return result; + } + + + /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ + template<typename NumericT> + viennacl::vector<NumericT> solve_impl(viennacl::compressed_matrix<NumericT> const & A, + viennacl::vector<NumericT> const & rhs, + cg_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + return pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); + } + + + /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ + template<typename NumericT> + viennacl::vector<NumericT> solve_impl(viennacl::coordinate_matrix<NumericT> const & A, + viennacl::vector<NumericT> const & rhs, + cg_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); + } + + + + /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ + template<typename NumericT> + viennacl::vector<NumericT> solve_impl(viennacl::ell_matrix<NumericT> const & A, + viennacl::vector<NumericT> const & rhs, + cg_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); + } + + + + /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ + template<typename NumericT> + viennacl::vector<NumericT> solve_impl(viennacl::sliced_ell_matrix<NumericT> const & A, + viennacl::vector<NumericT> const & rhs, + cg_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); + } + + + /** @brief Overload for the pipelined CG implementation for the ViennaCL sparse matrix types */ + template<typename NumericT> + viennacl::vector<NumericT> solve_impl(viennacl::hyb_matrix<NumericT> const & A, + viennacl::vector<NumericT> const & rhs, + cg_tag const & tag, + viennacl::linalg::no_precond, + bool (*monitor)(viennacl::vector<NumericT> const &, NumericT, void*) = NULL, + void *monitor_data = NULL) + { + return detail::pipelined_solve(A, rhs, tag, viennacl::linalg::no_precond(), monitor, monitor_data); + } + + + template<typename MatrixT, typename VectorT, typename PreconditionerT> + VectorT solve_impl(MatrixT const & matrix, + VectorT const & rhs, + cg_tag const & tag, + PreconditionerT const & precond, + bool (*monitor)(VectorT const &, typename viennacl::result_of::cpu_value_type<typename viennacl::result_of::value_type<VectorT>::type>::type, void*) = NULL, + void *monitor_data = NULL) + { + typedef typename viennacl::result_of::value_type<VectorT>::type NumericType; + typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType; + + VectorT result = rhs; + viennacl::traits::clear(result); + + VectorT residual = rhs; + VectorT tmp = rhs; + detail::z_handler<VectorT, PreconditionerT> zhandler(residual); + VectorT & z = zhandler.get(); + + precond.apply(z); + VectorT p = z; + + CPU_NumericType ip_rr = viennacl::linalg::inner_prod(residual, z); + CPU_NumericType alpha; + CPU_NumericType new_ip_rr = 0; + CPU_NumericType beta; + CPU_NumericType norm_rhs_squared = ip_rr; + CPU_NumericType new_ipp_rr_over_norm_rhs; + + if (norm_rhs_squared <= tag.abs_tolerance() * tag.abs_tolerance()) //solution is zero if RHS norm (squared) is zero + return result; + + for (unsigned int i = 0; i < tag.max_iterations(); ++i) + { + tag.iters(i+1); + tmp = viennacl::linalg::prod(matrix, p); + + alpha = ip_rr / viennacl::linalg::inner_prod(tmp, p); + + result += alpha * p; + residual -= alpha * tmp; + z = residual; + precond.apply(z); + + if (static_cast<VectorT*>(&residual)==static_cast<VectorT*>(&z)) + new_ip_rr = std::pow(viennacl::linalg::norm_2(residual),2); + else + new_ip_rr = viennacl::linalg::inner_prod(residual, z); + + new_ipp_rr_over_norm_rhs = new_ip_rr / norm_rhs_squared; + if (monitor && monitor(result, std::sqrt(std::fabs(new_ipp_rr_over_norm_rhs)), monitor_data)) + break; + if (std::fabs(new_ipp_rr_over_norm_rhs) < tag.tolerance() * tag.tolerance() || std::fabs(new_ip_rr) < tag.abs_tolerance() * tag.abs_tolerance()) //squared norms involved here + break; + + beta = new_ip_rr / ip_rr; + ip_rr = new_ip_rr; + + p = z + beta*p; + } + + //store last error estimate: + tag.error(std::sqrt(std::fabs(new_ip_rr / norm_rhs_squared))); + + return result; + } + +} + + + +/** @brief Implementation of the preconditioned conjugate gradient solver, generic implementation for non-ViennaCL types. +* +* Following Algorithm 9.1 in "Iterative Methods for Sparse Linear Systems" by Y. Saad +* +* @param matrix The system matrix +* @param rhs The load vector +* @param tag Solver configuration tag +* @param precond A preconditioner. Precondition operation is done via member function apply() +* @return The result vector +*/ +template<typename MatrixT, typename VectorT, typename PreconditionerT> +VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag, PreconditionerT const & precond) +{ + return detail::solve_impl(matrix, rhs, tag, precond); +} + +/** @brief Convenience overload for calling the CG solver using types from the C++ STL. + * + * A std::vector<std::map<T, U> > matrix is convenient for e.g. finite element assembly. + * It is not the fastest option for setting up a system, but often it is fast enough - particularly for just trying things out. + */ +template<typename IndexT, typename NumericT, typename PreconditionerT> +std::vector<NumericT> solve(std::vector< std::map<IndexT, NumericT> > const & A, std::vector<NumericT> const & rhs, cg_tag const & tag, PreconditionerT const & precond) +{ + viennacl::compressed_matrix<NumericT> vcl_A; + viennacl::copy(A, vcl_A); + + viennacl::vector<NumericT> vcl_rhs(rhs.size()); + viennacl::copy(rhs, vcl_rhs); + + viennacl::vector<NumericT> vcl_result = solve(vcl_A, vcl_rhs, tag, precond); + + std::vector<NumericT> result(vcl_result.size()); + viennacl::copy(vcl_result, result); + return result; +} + +/** @brief Entry point for the unpreconditioned CG method. + * + * @param matrix The system matrix + * @param rhs Right hand side vector (load vector) + * @param tag A BiCGStab tag providing relative tolerances, etc. + */ +template<typename MatrixT, typename VectorT> +VectorT solve(MatrixT const & matrix, VectorT const & rhs, cg_tag const & tag) +{ + return solve(matrix, rhs, tag, viennacl::linalg::no_precond()); +} + + + +template<typename VectorT> +class cg_solver +{ +public: + typedef typename viennacl::result_of::cpu_value_type<VectorT>::type numeric_type; + + cg_solver(cg_tag const & tag) : tag_(tag), monitor_callback_(NULL), user_data_(NULL) {} + + template<typename MatrixT, typename PreconditionerT> + VectorT operator()(MatrixT const & A, VectorT const & b, PreconditionerT const & precond) const + { + if (viennacl::traits::size(init_guess_) > 0) // take initial guess into account + { + VectorT mod_rhs = viennacl::linalg::prod(A, init_guess_); + mod_rhs = b - mod_rhs; + VectorT y = detail::solve_impl(A, mod_rhs, tag_, precond, monitor_callback_, user_data_); + return init_guess_ + y; + } + return detail::solve_impl(A, b, tag_, precond, monitor_callback_, user_data_); + } + + + template<typename MatrixT> + VectorT operator()(MatrixT const & A, VectorT const & b) const + { + return operator()(A, b, viennacl::linalg::no_precond()); + } + + /** @brief Specifies an initial guess for the iterative solver. + * + * An iterative solver for Ax = b with initial guess x_0 is equivalent to an iterative solver for Ay = b' := b - Ax_0, where x = x_0 + y. + */ + void set_initial_guess(VectorT const & x) { init_guess_ = x; } + + /** @brief Sets a monitor function pointer to be called in each iteration. Set to NULL to run without monitor. + * + * The monitor function is called with the current guess for the result as first argument and the current relative residual estimate as second argument. + * The third argument is a pointer to user-defined data, through which additional information can be passed. + * This pointer needs to be set with set_monitor_data. If not set, NULL is passed. + * If the montior function returns true, the solver terminates (either convergence or divergence). + */ + void set_monitor(bool (*monitor_fun)(VectorT const &, numeric_type, void *), void *user_data) + { + monitor_callback_ = monitor_fun; + user_data_ = user_data; + } + + /** @brief Returns the solver tag containing basic configuration such as tolerances, etc. */ + cg_tag const & tag() const { return tag_; } + +private: + cg_tag tag_; + VectorT init_guess_; + bool (*monitor_callback_)(VectorT const &, numeric_type, void *); + void *user_data_; +}; + + +} +} + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp new file mode 100644 index 0000000..5325b7b --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/circulant_matrix_operations.hpp @@ -0,0 +1,75 @@ +#ifndef VIENNACL_LINALG_CIRCULANT_MATRIX_OPERATIONS_HPP_ +#define VIENNACL_LINALG_CIRCULANT_MATRIX_OPERATIONS_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/circulant_matrix_operations.hpp + @brief Implementations of operations using circulant_matrix. Experimental. +*/ + +#include "viennacl/forwards.h" +#include "viennacl/ocl/backend.hpp" +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/tools/tools.hpp" +#include "viennacl/fft.hpp" +//#include "viennacl/linalg/kernels/coordinate_matrix_kernels.h" + +namespace viennacl +{ +namespace linalg +{ + +// A * x + +/** @brief Carries out matrix-vector multiplication with a circulant_matrix +* +* Implementation of the convenience expression result = prod(mat, vec); +* +* @param mat The matrix +* @param vec The vector +* @param result The result vector +*/ +template<typename NumericT, unsigned int AlignmentV> +void prod_impl(viennacl::circulant_matrix<NumericT, AlignmentV> const & mat, + viennacl::vector_base<NumericT> const & vec, + viennacl::vector_base<NumericT> & result) +{ + assert(mat.size1() == result.size() && bool("Dimension mismatch")); + assert(mat.size2() == vec.size() && bool("Dimension mismatch")); + //result.clear(); + + //std::cout << "prod(circulant_matrix" << ALIGNMENT << ", vector) called with internal_nnz=" << mat.internal_nnz() << std::endl; + + viennacl::vector<NumericT> circ(mat.elements().size() * 2); + viennacl::linalg::real_to_complex(mat.elements(), circ, mat.elements().size()); + + viennacl::vector<NumericT> tmp(vec.size() * 2); + viennacl::vector<NumericT> tmp2(vec.size() * 2); + + viennacl::linalg::real_to_complex(vec, tmp, vec.size()); + viennacl::linalg::convolve(circ, tmp, tmp2); + viennacl::linalg::complex_to_real(tmp2, result, vec.size()); + +} + +} //namespace linalg +} //namespace viennacl + + +#endif
