http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp deleted file mode 100644 index 8361308..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp +++ /dev/null @@ -1,208 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_AMG_AMG_BASE_HPP_ -#define VIENNACL_LINALG_DETAIL_AMG_AMG_BASE_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file amg_base.hpp - @brief Helper classes and functions for the AMG preconditioner. Experimental. - - AMG code contributed by Markus Wagner -*/ - -#include <cmath> -#include <set> -#include <list> -#include <stdexcept> -#include <algorithm> - -#include <map> -#ifdef VIENNACL_WITH_OPENMP -#include <omp.h> -#endif - -#include "viennacl/context.hpp" - -namespace viennacl -{ -namespace linalg -{ - -/** @brief Enumeration of coarsening methods for algebraic multigrid. */ -enum amg_coarsening_method -{ - AMG_COARSENING_METHOD_ONEPASS = 1, - AMG_COARSENING_METHOD_AGGREGATION, - AMG_COARSENING_METHOD_MIS2_AGGREGATION -}; - -/** @brief Enumeration of interpolation methods for algebraic multigrid. */ -enum amg_interpolation_method -{ - AMG_INTERPOLATION_METHOD_DIRECT = 1, - AMG_INTERPOLATION_METHOD_AGGREGATION, - AMG_INTERPOLATION_METHOD_SMOOTHED_AGGREGATION -}; - - -/** @brief A tag for algebraic multigrid (AMG). Used to transport information from the user to the implementation. -*/ -class amg_tag -{ -public: - /** @brief The constructor, setting default values for the various parameters. - * - * Default coarsening routine: Aggreggation based on maximum independent sets of distance (MIS-2) - * Default interpolation routine: Smoothed aggregation - * Default threshold for strong connections: 0.1 (customizations are recommeded!) - * Default weight for Jacobi smoother: 1.0 - * Default number of pre-smooth operations: 2 - * Default number of post-smooth operations: 2 - * Default number of coarse levels: 0 (this indicates that as many coarse levels as needed are constructed until the cutoff is reached) - * Default coarse grid size for direct solver (coarsening cutoff): 50 - */ - amg_tag() - : coarsening_method_(AMG_COARSENING_METHOD_MIS2_AGGREGATION), interpolation_method_(AMG_INTERPOLATION_METHOD_AGGREGATION), - strong_connection_threshold_(0.1), jacobi_weight_(1.0), - presmooth_steps_(2), postsmooth_steps_(2), - coarse_levels_(0), coarse_cutoff_(50) {} - - // Getter-/Setter-Functions - /** @brief Sets the strategy used for constructing coarse grids */ - void set_coarsening_method(amg_coarsening_method s) { coarsening_method_ = s; } - /** @brief Returns the current coarsening strategy */ - amg_coarsening_method get_coarsening_method() const { return coarsening_method_; } - - /** @brief Sets the interpolation method to the provided method */ - void set_interpolation_method(amg_interpolation_method interpol) { interpolation_method_ = interpol; } - /** @brief Returns the current interpolation method */ - amg_interpolation_method get_interpolation_method() const { return interpolation_method_; } - - /** @brief Sets the strong connection threshold. Customizations by the user essential for best results! - * - * With classical interpolation, a connection is considered strong if |a_ij| >= threshold * max_k(|a_ik|) - * Strength of connection currently ignored for aggregation-based coarsening (to be added in the future). - */ - void set_strong_connection_threshold(double threshold) { if (threshold > 0) strong_connection_threshold_ = threshold; } - /** @brief Returns the strong connection threshold parameter. - * - * @see set_strong_connection_threshold() for an explanation of the threshold parameter - */ - double get_strong_connection_threshold() const { return strong_connection_threshold_; } - - /** @brief Sets the weight (damping) for the Jacobi smoother. - * - * The optimal value depends on the problem at hand. Values of 0.67 or 1.0 are usually a good starting point for further experiments. - */ - void set_jacobi_weight(double w) { if (w > 0) jacobi_weight_ = w; } - /** @brief Returns the Jacobi smoother weight (damping). */ - double get_jacobi_weight() const { return jacobi_weight_; } - - /** @brief Sets the number of smoother applications on the fine level before restriction to the coarser level. */ - void set_presmooth_steps(vcl_size_t steps) { presmooth_steps_ = steps; } - /** @brief Returns the number of smoother applications on the fine level before restriction to the coarser level. */ - vcl_size_t get_presmooth_steps() const { return presmooth_steps_; } - - /** @brief Sets the number of smoother applications on the coarse level before interpolation to the finer level. */ - void set_postsmooth_steps(vcl_size_t steps) { postsmooth_steps_ = steps; } - /** @brief Returns the number of smoother applications on the coarse level before interpolation to the finer level. */ - vcl_size_t get_postsmooth_steps() const { return postsmooth_steps_; } - - /** @brief Sets the number of coarse levels. If set to zero, then coarse levels are constructed until the cutoff size is reached. */ - void set_coarse_levels(vcl_size_t levels) { coarse_levels_ = levels; } - /** @brief Returns the number of coarse levels. If zero, then coarse levels are constructed until the cutoff size is reached. */ - vcl_size_t get_coarse_levels() const { return coarse_levels_; } - - /** @brief Sets the coarse grid size for which the recursive multigrid scheme is stopped and a direct solver is used. */ - void set_coarsening_cutoff(vcl_size_t size) { coarse_cutoff_ = size; } - /** @brief Returns the coarse grid size for which the recursive multigrid scheme is stopped and a direct solver is used. */ - vcl_size_t get_coarsening_cutoff() const { return coarse_cutoff_; } - - /** @brief Sets the ViennaCL context for the setup stage. Set this to a host context if you want to run the setup on the host. - * - * Set the ViennaCL context for the solver application via set_target_context(). - * Target and setup context can be different. - */ - void set_setup_context(viennacl::context ctx) { setup_ctx_ = ctx; } - /** @brief Returns the ViennaCL context for the preconditioenr setup. */ - viennacl::context const & get_setup_context() const { return setup_ctx_; } - - /** @brief Sets the ViennaCL context for the solver cycle stage (i.e. preconditioner applications). - * - * Since the cycle stage easily benefits from accelerators, you usually want to set this to a CUDA or OpenCL-enabled context. - */ - void set_target_context(viennacl::context ctx) { target_ctx_ = ctx; } - /** @brief Returns the ViennaCL context for the solver cycle stage (i.e. preconditioner applications). */ - viennacl::context const & get_target_context() const { return target_ctx_; } - -private: - amg_coarsening_method coarsening_method_; - amg_interpolation_method interpolation_method_; - double strong_connection_threshold_, jacobi_weight_; - vcl_size_t presmooth_steps_, postsmooth_steps_, coarse_levels_, coarse_cutoff_; - viennacl::context setup_ctx_, target_ctx_; -}; - - -namespace detail -{ -namespace amg -{ - - - struct amg_level_context - { - void resize(vcl_size_t num_points, vcl_size_t max_nnz) - { - influence_jumper_.resize(num_points + 1, false); - influence_ids_.resize(max_nnz, false); - influence_values_.resize(num_points, false); - point_types_.resize(num_points, false); - coarse_id_.resize(num_points, false); - } - - void switch_context(viennacl::context ctx) - { - influence_jumper_.switch_memory_context(ctx); - influence_ids_.switch_memory_context(ctx); - influence_values_.switch_memory_context(ctx); - point_types_.switch_memory_context(ctx); - coarse_id_.switch_memory_context(ctx); - } - - enum - { - POINT_TYPE_UNDECIDED = 0, - POINT_TYPE_COARSE, - POINT_TYPE_FINE - } amg_point_types; - - viennacl::vector<unsigned int> influence_jumper_; // similar to row_buffer for CSR matrices - viennacl::vector<unsigned int> influence_ids_; // IDs of influencing points - viennacl::vector<unsigned int> influence_values_; // Influence measure for each point - viennacl::vector<unsigned int> point_types_; // 0: undecided, 1: coarse point, 2: fine point. Using char here because type for enum might be a larger type - viennacl::vector<unsigned int> coarse_id_; // coarse ID used on the next level. Only valid for coarse points. Fine points may (ab)use their entry for something else. - unsigned int num_coarse_; - }; - - -} //namespace amg -} -} -} - -#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp deleted file mode 100644 index 8308f77..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp +++ /dev/null @@ -1,191 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_BISECT_KERNEL_CALLS_HPP_ -#define VIENNACL_LINALG_DETAIL_BISECT_KERNEL_CALLS_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp - @brief Kernel calls for the bisection algorithm - - Implementation based on the sample provided with the CUDA 6.0 SDK, for which - the creation of derivative works is allowed by including the following statement: - "This software contains source code provided by NVIDIA Corporation." -*/ - - -#include "viennacl/forwards.h" -#include "viennacl/scalar.hpp" -#include "viennacl/vector.hpp" -#include "viennacl/vector_proxy.hpp" -#include "viennacl/tools/tools.hpp" -#include "viennacl/meta/enable_if.hpp" -#include "viennacl/meta/predicate.hpp" -#include "viennacl/meta/result_of.hpp" -#include "viennacl/traits/size.hpp" -#include "viennacl/traits/start.hpp" -#include "viennacl/traits/handle.hpp" -#include "viennacl/traits/stride.hpp" - -#include "viennacl/linalg/detail/bisect/structs.hpp" -#ifdef VIENNACL_WITH_OPENCL - #include "viennacl/linalg/opencl/bisect_kernel_calls.hpp" -#endif - -#ifdef VIENNACL_WITH_CUDA - #include "viennacl/linalg/cuda/bisect_kernel_calls.hpp" -#endif - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ - template<typename NumericT> - void bisectSmall(const InputData<NumericT> &input, ResultDataSmall<NumericT> &result, - const unsigned int mat_size, - const NumericT lg, const NumericT ug, - const NumericT precision) - { - switch (viennacl::traits::handle(input.g_a).get_active_handle_id()) - { -#ifdef VIENNACL_WITH_OPENCL - case viennacl::OPENCL_MEMORY: - viennacl::linalg::opencl::bisectSmall(input, result, - mat_size, - lg,ug, - precision); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case viennacl::CUDA_MEMORY: - viennacl::linalg::cuda::bisectSmall(input, result, - mat_size, - lg,ug, - precision); - break; -#endif - case viennacl::MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("not implemented"); - } - } - - - - - template<typename NumericT> - void bisectLarge(const InputData<NumericT> &input, ResultDataLarge<NumericT> &result, - const unsigned int mat_size, - const NumericT lg, const NumericT ug, - const NumericT precision) - { - switch (viennacl::traits::handle(input.g_a).get_active_handle_id()) - { -#ifdef VIENNACL_WITH_OPENCL - case viennacl::OPENCL_MEMORY: - viennacl::linalg::opencl::bisectLarge(input, result, - mat_size, - lg,ug, - precision); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case viennacl::CUDA_MEMORY: - viennacl::linalg::cuda::bisectLarge(input, result, - mat_size, - lg,ug, - precision); - break; -#endif - case viennacl::MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("not implemented"); - } - } - - - - - - template<typename NumericT> - void bisectLarge_OneIntervals(const InputData<NumericT> &input, ResultDataLarge<NumericT> &result, - const unsigned int mat_size, - const NumericT precision) - { - switch (viennacl::traits::handle(input.g_a).get_active_handle_id()) - { -#ifdef VIENNACL_WITH_OPENCL - case viennacl::OPENCL_MEMORY: - viennacl::linalg::opencl::bisectLargeOneIntervals(input, result, - mat_size, - precision); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case viennacl::CUDA_MEMORY: - viennacl::linalg::cuda::bisectLarge_OneIntervals(input, result, - mat_size, - precision); - - break; -#endif - case viennacl::MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("not implemented"); - } - } - - - - - template<typename NumericT> - void bisectLarge_MultIntervals(const InputData<NumericT> &input, ResultDataLarge<NumericT> &result, - const unsigned int mat_size, - const NumericT precision) - { - switch (viennacl::traits::handle(input.g_a).get_active_handle_id()) - { -#ifdef VIENNACL_WITH_OPENCL - case viennacl::OPENCL_MEMORY: - viennacl::linalg::opencl::bisectLargeMultIntervals(input, result, - mat_size, - precision); - break; -#endif -#ifdef VIENNACL_WITH_CUDA - case viennacl::CUDA_MEMORY: - viennacl::linalg::cuda::bisectLarge_MultIntervals(input, result, - mat_size, - precision); - break; -#endif - case viennacl::MEMORY_NOT_INITIALIZED: - throw memory_exception("not initialised!"); - default: - throw memory_exception("not implemented"); - } - } -} // namespace detail -} // namespace linalg -} //namespace viennacl - - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp deleted file mode 100755 index 337858f..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp +++ /dev/null @@ -1,142 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_BISECT_BISECT_LARGE_HPP_ -#define VIENNACL_LINALG_DETAIL_BISECT_BISECT_LARGE_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - -/** @file viennacl/linalg/detail//bisect/bisect_large.hpp - @brief Computation of eigenvalues of a large symmetric, tridiagonal matrix - - Implementation based on the sample provided with the CUDA 6.0 SDK, for which - the creation of derivative works is allowed by including the following statement: - "This software contains source code provided by NVIDIA Corporation." -*/ - - -// includes, system -#include <iostream> -#include <iomanip> -#include <stdlib.h> -#include <stdio.h> - -// includes, project -#include "viennacl/linalg/detail/bisect/config.hpp" -#include "viennacl/linalg/detail/bisect/structs.hpp" -#include "viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp" - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ - -//////////////////////////////////////////////////////////////////////////////// -//! Run the kernels to compute the eigenvalues for large matrices -//! @param input handles to input data -//! @param result handles to result data -//! @param mat_size matrix size -//! @param precision desired precision of eigenvalues -//! @param lg lower limit of Gerschgorin interval -//! @param ug upper limit of Gerschgorin interval -//////////////////////////////////////////////////////////////////////////////// -template<typename NumericT> -void -computeEigenvaluesLargeMatrix(InputData<NumericT> &input, ResultDataLarge<NumericT> &result, - const unsigned int mat_size, - const NumericT lg, const NumericT ug, const NumericT precision) -{ - - - // First kernel call: decide on which intervals bisect_Large_OneIntervals/ - // bisect_Large_MultIntervals is executed - viennacl::linalg::detail::bisectLarge(input, result, mat_size, lg, ug, precision); - - // compute eigenvalues for intervals that contained only one eigenvalue - // after the first processing step - viennacl::linalg::detail::bisectLarge_OneIntervals(input, result, mat_size, precision); - - // process intervals that contained more than one eigenvalue after - // the first processing step - viennacl::linalg::detail::bisectLarge_MultIntervals(input, result, mat_size, precision); - -} - -//////////////////////////////////////////////////////////////////////////////// -//! Process the result, that is obtain result from device and do simple sanity -//! checking -//! @param result handles to result data -//! @param mat_size matrix size -//////////////////////////////////////////////////////////////////////////////// -template<typename NumericT> -bool -processResultDataLargeMatrix(ResultDataLarge<NumericT> &result, - const unsigned int mat_size) -{ - bool bCompareResult = true; - // copy data from intervals that contained more than one eigenvalue after - // the first processing step - std::vector<NumericT> lambda_mult(mat_size); - viennacl::copy(result.g_lambda_mult, lambda_mult); - - std::vector<unsigned int> pos_mult(mat_size); - viennacl::copy(result.g_pos_mult, pos_mult); - - std::vector<unsigned int> blocks_mult_sum(mat_size); - viennacl::copy(result.g_blocks_mult_sum, blocks_mult_sum); - - unsigned int num_one_intervals = result.g_num_one; - unsigned int sum_blocks_mult = mat_size - num_one_intervals; - - - // copy data for intervals that contained one eigenvalue after the first - // processing step - std::vector<NumericT> left_one(mat_size); - std::vector<NumericT> right_one(mat_size); - std::vector<unsigned int> pos_one(mat_size); - - viennacl::copy(result.g_left_one, left_one); - viennacl::copy(result.g_right_one, right_one); - viennacl::copy(result.g_pos_one, pos_one); - - - // singleton intervals generated in the second step - for (unsigned int i = 0; i < sum_blocks_mult; ++i) - { - if (pos_mult[i] != 0) - result.std_eigenvalues[pos_mult[i] - 1] = lambda_mult[i]; - - else - { - throw memory_exception("Invalid array index! Are there more than 256 equal eigenvalues?"); - } - } - - // singleton intervals generated in the first step - unsigned int index = 0; - - for (unsigned int i = 0; i < num_one_intervals; ++i, ++index) - { - result.std_eigenvalues[pos_one[i] - 1] = left_one[i]; - } - return bCompareResult; -} -} // namespace detail -} // namespace linalg -} // namespace viennacl -#endif //VIENNACL_LINALG_DETAIL_BISECT_LARGE_HPP_ http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp deleted file mode 100755 index 144640b..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp +++ /dev/null @@ -1,96 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_BISECT_SMALL_HPP_ -#define VIENNACL_LINALG_DETAIL_BISECT_SMALL_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - -/** @file viennacl/linalg/detail//bisect/bisect_small.hpp - @brief Computation of eigenvalues of a small symmetric, tridiagonal matrix - - Implementation based on the sample provided with the CUDA 6.0 SDK, for which - the creation of derivative works is allowed by including the following statement: - "This software contains source code provided by NVIDIA Corporation." -*/ - -// includes, system -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <math.h> -#include <float.h> - -// includes, project - -#include "viennacl/linalg/detail/bisect/structs.hpp" - -// includes, kernels -#include "viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp" - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ -//////////////////////////////////////////////////////////////////////////////// -//! Determine eigenvalues for matrices smaller than MAX_SMALL_MATRIX -//! @param input handles to input data of kernel -//! @param result handles to result of kernel -//! @param mat_size matrix size -//! @param lg lower limit of Gerschgorin interval -//! @param ug upper limit of Gerschgorin interval -//! @param precision desired precision of eigenvalues -//////////////////////////////////////////////////////////////////////////////// -template<typename NumericT> -void -computeEigenvaluesSmallMatrix(const InputData<NumericT> &input, ResultDataSmall<NumericT> &result, - const unsigned int mat_size, - const NumericT lg, const NumericT ug, - const NumericT precision) -{ - viennacl::linalg::detail::bisectSmall( input, result, mat_size, lg, ug, precision); -} - - -//////////////////////////////////////////////////////////////////////////////// -//! Process the result obtained on the device, that is transfer to host and -//! perform basic sanity checking -//! @param result handles to result data -//! @param mat_size matrix size -//////////////////////////////////////////////////////////////////////////////// -template<typename NumericT> -void -processResultSmallMatrix(ResultDataSmall<NumericT> &result, - const unsigned int mat_size) -{ - // copy data back to host - std::vector<NumericT> left(mat_size); - std::vector<unsigned int> left_count(mat_size); - - viennacl::copy(result.vcl_g_left, left); - viennacl::copy(result.vcl_g_left_count, left_count); - - for (unsigned int i = 0; i < mat_size; ++i) - { - result.std_eigenvalues[left_count[i]] = left[i]; - } -} -} // namespace detail -} // namespace linalg -} // namespace viennacl -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp deleted file mode 100755 index 3afa509..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_BISECT_CONFIG_HPP_ -#define VIENNACL_LINALG_DETAIL_BISECT_CONFIG_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - - -/** @file viennacl/linalg/detail//bisect/config.hpp - * @brief Global configuration parameters - * - * Implementation based on the sample provided with the CUDA 6.0 SDK, for which - * the creation of derivative works is allowed by including the following statement: - * "This software contains source code provided by NVIDIA Corporation." - * */ - -// should be power of two -#define VIENNACL_BISECT_MAX_THREADS_BLOCK 256 - -#ifdef VIENNACL_WITH_OPENCL -# define VIENNACL_BISECT_MAX_SMALL_MATRIX 256 -# define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX 256 -#else // if CUDA is used -# define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX 512 // change to 256 if errors occur -# define VIENNACL_BISECT_MAX_SMALL_MATRIX 512 // change to 256 if errors occur -#endif - - #define VIENNACL_BISECT_MIN_ABS_INTERVAL 5.0e-37 - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp deleted file mode 100755 index 53cd863..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp +++ /dev/null @@ -1,94 +0,0 @@ -#ifndef _VIENNACL_LINALG_DETAIL_BISECT_GERSCHORIN_HPP_ -#define _VIENNACL_LINALG_DETAIL_BISECT_GERSCHORIN_HPP_ - - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - -/** @file viennacl/linalg/detail//bisect/gerschgorin.hpp - @brief Computation of Gerschgorin interval for symmetric, tridiagonal matrix - - Implementation based on the sample provided with the CUDA 6.0 SDK, for which - the creation of derivative works is allowed by including the following statement: - "This software contains source code provided by NVIDIA Corporation." -*/ - -#include <cstdio> -#include <cstdlib> -#include <cmath> -#include <cfloat> - -#include "viennacl/linalg/detail/bisect/util.hpp" -#include "viennacl/vector.hpp" - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ - //////////////////////////////////////////////////////////////////////////////// - //! Compute Gerschgorin interval for symmetric, tridiagonal matrix - //! @param d diagonal elements - //! @param s superdiagonal elements - //! @param n size of matrix - //! @param lg lower limit of Gerschgorin interval - //! @param ug upper limit of Gerschgorin interval - //////////////////////////////////////////////////////////////////////////////// - template<typename NumericT> - void - computeGerschgorin(std::vector<NumericT> & d, std::vector<NumericT> & s, unsigned int n, NumericT &lg, NumericT &ug) - { - // compute bounds - for (unsigned int i = 1; i < (n - 1); ++i) - { - - // sum over the absolute values of all elements of row i - NumericT sum_abs_ni = fabsf(s[i]) + fabsf(s[i + 1]); - - lg = min(lg, d[i] - sum_abs_ni); - ug = max(ug, d[i] + sum_abs_ni); - } - - // first and last row, only one superdiagonal element - - // first row - lg = min(lg, d[0] - fabsf(s[1])); - ug = max(ug, d[0] + fabsf(s[1])); - - // last row - lg = min(lg, d[n-1] - fabsf(s[n-1])); - ug = max(ug, d[n-1] + fabsf(s[n-1])); - - // increase interval to avoid side effects of fp arithmetic - NumericT bnorm = max(fabsf(ug), fabsf(lg)); - - // these values depend on the implmentation of floating count that is - // employed in the following - NumericT psi_0 = 11 * FLT_EPSILON * bnorm; - NumericT psi_n = 11 * FLT_EPSILON * bnorm; - - lg = lg - bnorm * 2 * static_cast<NumericT>(n) * FLT_EPSILON - psi_0; - ug = ug + bnorm * 2 * static_cast<NumericT>(n) * FLT_EPSILON + psi_n; - - ug = max(lg, ug); - } -} // namespace detail -} // namespace linalg -} // namespace viennacl -#endif // _VIENNACL_LINALG_DETAIL_GERSCHORIN_H_ http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp deleted file mode 100755 index 1943da3..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp +++ /dev/null @@ -1,182 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_BISECT_STRUCTS_HPP_ -#define VIENNACL_LINALG_DETAIL_BISECT_STRUCTS_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - -/** @file viennacl/linalg/detail//bisect/structs.hpp - @brief Helper structures to simplify variable handling - - Implementation based on the sample provided with the CUDA 6.0 SDK, for which - the creation of derivative works is allowed by including the following statement: - "This software contains source code provided by NVIDIA Corporation." -*/ - - - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <math.h> -#include <float.h> -#include <assert.h> - -#include "viennacl/vector.hpp" -#include "viennacl/matrix.hpp" - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ - -///////////////////////////////////////////////////////////////////////////////// -//! In this class the input matrix is stored -///////////////////////////////////////////////////////////////////////////////// -template<typename NumericT> -struct InputData -{ - //! host side representation of diagonal - std::vector<NumericT> std_a; - //! host side representation superdiagonal - std::vector<NumericT> std_b; - //! device side representation of diagonal - viennacl::vector<NumericT> g_a; - //!device side representation of superdiagonal - viennacl::vector<NumericT> g_b; - - /** @brief Initialize the input data to the algorithm - * - * @param diagonal vector with the diagonal elements - * @param superdiagonal vector with the superdiagonal elements - * @param sz size of the matrix - */ - InputData(std::vector<NumericT> diagonal, std::vector<NumericT> superdiagonal, const unsigned int sz) : - std_a(sz), std_b(sz), g_a(sz), g_b(sz) - { - std_a = diagonal; - std_b = superdiagonal; - - viennacl::copy(std_b, g_b); - viennacl::copy(std_a, g_a); - } - - InputData(viennacl::vector<NumericT> diagonal, viennacl::vector<NumericT> superdiagonal, const unsigned int sz) : - std_a(sz), std_b(sz), g_a(sz), g_b(sz) - { - g_a = diagonal; - g_b = superdiagonal; - - viennacl::copy(g_a, std_a); - viennacl::copy(g_b, std_b); - } -}; - - -///////////////////////////////////////////////////////////////////////////////// -//! In this class the data of the result for small matrices is stored -///////////////////////////////////////////////////////////////////////////////// -template<typename NumericT> -struct ResultDataSmall -{ - //! eigenvalues (host side) - std::vector<NumericT> std_eigenvalues; - //! left interval limits at the end of the computation - viennacl::vector<NumericT> vcl_g_left; - //! right interval limits at the end of the computation - viennacl::vector<NumericT> vcl_g_right; - //! number of eigenvalues smaller than the left interval limit - viennacl::vector<unsigned int> vcl_g_left_count; - //! number of eigenvalues bigger than the right interval limit - viennacl::vector<unsigned int> vcl_g_right_count; - - - //////////////////////////////////////////////////////////////////////////////// - //! Initialize variables and memory for the result for small matrices - //////////////////////////////////////////////////////////////////////////////// - ResultDataSmall(const unsigned int mat_size) : - std_eigenvalues(mat_size), vcl_g_left(mat_size), vcl_g_right(mat_size), vcl_g_left_count(mat_size), vcl_g_right_count(mat_size) {} -}; - - - - - -///////////////////////////////////////////////////////////////////////////////// -//! In this class the data of the result for large matrices is stored -///////////////////////////////////////////////////////////////////////////////// -template<typename NumericT> -struct ResultDataLarge -{ -//! eigenvalues - std::vector<NumericT> std_eigenvalues; - - //! number of intervals containing one eigenvalue after the first step - viennacl::scalar<unsigned int> g_num_one; - - //! number of (thread) blocks of intervals containing multiple eigenvalues after the first steo - viennacl::scalar<unsigned int> g_num_blocks_mult; - - //! left interval limits of intervals containing one eigenvalue after the first iteration step - viennacl::vector<NumericT> g_left_one; - - //! right interval limits of intervals containing one eigenvalue after the first iteration step - viennacl::vector<NumericT> g_right_one; - - //! interval indices (position in sorted listed of eigenvalues) of intervals containing one eigenvalue after the first iteration step - viennacl::vector<unsigned int> g_pos_one; - - //! left interval limits of intervals containing multiple eigenvalues after the first iteration step - viennacl::vector<NumericT> g_left_mult; - //! right interval limits of intervals containing multiple eigenvalues after the first iteration step - viennacl::vector<NumericT> g_right_mult; - - //! number of eigenvalues less than the left limit of the eigenvalue intervals containing multiple eigenvalues - viennacl::vector<unsigned int> g_left_count_mult; - - //! number of eigenvalues less than the right limit of the eigenvalue intervals containing multiple eigenvalues - viennacl::vector<unsigned int> g_right_count_mult; - //! start addresses in g_left_mult etc. of blocks of intervals containing more than one eigenvalue after the first step - viennacl::vector<unsigned int> g_blocks_mult; - - //! accumulated number of intervals in g_left_mult etc. of blocks of intervals containing more than one eigenvalue after the first step - viennacl::vector<unsigned int> g_blocks_mult_sum; - - //! eigenvalues that have been generated in the second step from intervals that still contained multiple eigenvalues after the first step - viennacl::vector<NumericT> g_lambda_mult; - - //! eigenvalue index of intervals that have been generated in the second processing step - viennacl::vector<unsigned int> g_pos_mult; - - /** @brief Initialize variables and memory for result - * - * @param mat_size size of the matrix - */ - ResultDataLarge(unsigned int mat_size) : - std_eigenvalues(mat_size), g_num_one(0), g_num_blocks_mult(0), - g_left_one(mat_size), g_right_one(mat_size), g_pos_one(mat_size), - g_left_mult(mat_size), g_right_mult(mat_size), g_left_count_mult(mat_size), g_right_count_mult(mat_size), - g_blocks_mult(mat_size), g_blocks_mult_sum(mat_size), g_lambda_mult(mat_size), g_pos_mult(mat_size) {} - -}; -} // namespace detail -} // namespace linalg -} // namespace viennacl -#endif // #ifndef VIENNACL_LINALG_DETAIL_STRUCTS_HPP_ - http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp deleted file mode 100755 index 883d202..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp +++ /dev/null @@ -1,106 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_BISECT_UTIL_HPP_ -#define VIENNACL_LINALG_DETAIL_BISECT_UTIL_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - -/** @file viennacl/linalg/detail//bisect/util.hpp - @brief Utility functions - - Implementation based on the sample provided with the CUDA 6.0 SDK, for which - the creation of derivative works is allowed by including the following statement: - "This software contains source code provided by NVIDIA Corporation." -*/ - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ - -//////////////////////////////////////////////////////////////////////////////// -//! Minimum -//////////////////////////////////////////////////////////////////////////////// -template<class T> -#ifdef __CUDACC__ -__host__ __device__ -#endif -T -min(const T &lhs, const T &rhs) -{ - - return (lhs < rhs) ? lhs : rhs; -} - -//////////////////////////////////////////////////////////////////////////////// -//! Maximum -//////////////////////////////////////////////////////////////////////////////// -template<class T> -#ifdef __CUDACC__ -__host__ __device__ -#endif -T -max(const T &lhs, const T &rhs) -{ - - return (lhs < rhs) ? rhs : lhs; -} - -//////////////////////////////////////////////////////////////////////////////// -//! Sign of number (float) -//////////////////////////////////////////////////////////////////////////////// -#ifdef __CUDACC__ -__host__ __device__ -#endif -inline float -sign_f(const float &val) -{ - return (val < 0.0f) ? -1.0f : 1.0f; -} - -//////////////////////////////////////////////////////////////////////////////// -//! Sign of number (double) -//////////////////////////////////////////////////////////////////////////////// -#ifdef __CUDACC__ -__host__ __device__ -#endif -inline double -sign_d(const double &val) -{ - return (val < 0.0) ? -1.0 : 1.0; -} - -/////////////////////////////////////////////////////////////////////////////// -//! Get the number of blocks that are required to process \a num_threads with -//! \a num_threads_blocks threads per block -/////////////////////////////////////////////////////////////////////////////// -extern "C" -inline -unsigned int -getNumBlocksLinear(const unsigned int num_threads, - const unsigned int num_threads_block) -{ - const unsigned int block_rem = - ((num_threads % num_threads_block) != 0) ? 1 : 0; - return (num_threads / num_threads_block) + block_rem; -} -} // namespace detail -} // namespace linalg -} // namespace viennacl -#endif // #ifndef VIENNACL_LINALG_DETAIL_UTIL_HPP_ http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp deleted file mode 100644 index 1540e2d..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp +++ /dev/null @@ -1,617 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_ -#define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/detail/ilu/block_ilu.hpp - @brief Implementations of incomplete block factorization preconditioners -*/ - -#include <vector> -#include <cmath> -#include "viennacl/forwards.h" -#include "viennacl/tools/tools.hpp" -#include "viennacl/linalg/detail/ilu/common.hpp" -#include "viennacl/linalg/detail/ilu/ilu0.hpp" -#include "viennacl/linalg/detail/ilu/ilut.hpp" - -#include <map> - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ - /** @brief Helper range class for representing a subvector of a larger buffer. */ - template<typename VectorT, typename NumericT, typename SizeT = vcl_size_t> - class ilu_vector_range - { - public: - ilu_vector_range(VectorT & v, - SizeT start_index, - SizeT vec_size - ) : vec_(v), start_(start_index), size_(vec_size) {} - - NumericT & operator()(SizeT index) - { - assert(index < size_ && bool("Index out of bounds!")); - return vec_[start_ + index]; - } - - NumericT & operator[](SizeT index) - { - assert(index < size_ && bool("Index out of bounds!")); - return vec_[start_ + index]; - } - - SizeT size() const { return size_; } - - private: - VectorT & vec_; - SizeT start_; - SizeT size_; - }; - - /** @brief Extracts a diagonal block from a larger system matrix - * - * @param A The full matrix - * @param diagonal_block_A The output matrix, to which the extracted block is written to - * @param start_index First row- and column-index of the block - * @param stop_index First row- and column-index beyond the block - */ - template<typename NumericT> - void extract_block_matrix(viennacl::compressed_matrix<NumericT> const & A, - viennacl::compressed_matrix<NumericT> & diagonal_block_A, - vcl_size_t start_index, - vcl_size_t stop_index - ) - { - assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); - assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); - assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); - - NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle()); - unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1()); - unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2()); - - NumericT * output_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_block_A.handle()); - unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle1()); - unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle2()); - - vcl_size_t output_counter = 0; - for (vcl_size_t row = start_index; row < stop_index; ++row) - { - unsigned int buffer_col_start = A_row_buffer[row]; - unsigned int buffer_col_end = A_row_buffer[row+1]; - - output_row_buffer[row - start_index] = static_cast<unsigned int>(output_counter); - - for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index) - { - unsigned int col = A_col_buffer[buf_index]; - if (col < start_index) - continue; - - if (col >= static_cast<unsigned int>(stop_index)) - continue; - - output_col_buffer[output_counter] = static_cast<unsigned int>(col - start_index); - output_elements[output_counter] = A_elements[buf_index]; - ++output_counter; - } - output_row_buffer[row - start_index + 1] = static_cast<unsigned int>(output_counter); - } - } - -} // namespace detail - - - -/** @brief A block ILU preconditioner class, can be supplied to solve()-routines - * - * @tparam MatrixType Type of the system matrix - * @tparam ILUTag Type of the tag identifiying the ILU preconditioner to be used on each block. -*/ -template<typename MatrixT, typename ILUTag> -class block_ilu_precond -{ -typedef typename MatrixT::value_type ScalarType; - -public: - typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block - - - block_ilu_precond(MatrixT const & mat, - ILUTag const & tag, - vcl_size_t num_blocks = 8 - ) : tag_(tag), L_blocks(num_blocks), U_blocks(num_blocks) - { - // Set up vector of block indices: - block_indices_.resize(num_blocks); - for (vcl_size_t i=0; i<num_blocks; ++i) - { - vcl_size_t start_index = ( i * mat.size1()) / num_blocks; - vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks; - - block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index); - } - - //initialize preconditioner: - //std::cout << "Start CPU precond" << std::endl; - init(mat); - //std::cout << "End CPU precond" << std::endl; - } - - block_ilu_precond(MatrixT const & mat, - ILUTag const & tag, - index_vector_type const & block_boundaries - ) : tag_(tag), block_indices_(block_boundaries), L_blocks(block_boundaries.size()), U_blocks(block_boundaries.size()) - { - //initialize preconditioner: - //std::cout << "Start CPU precond" << std::endl; - init(mat); - //std::cout << "End CPU precond" << std::endl; - } - - - template<typename VectorT> - void apply(VectorT & vec) const - { - for (vcl_size_t i=0; i<block_indices_.size(); ++i) - apply_dispatch(vec, i, ILUTag()); - } - -private: - void init(MatrixT const & A) - { - viennacl::context host_context(viennacl::MAIN_MEMORY); - viennacl::compressed_matrix<ScalarType> mat(host_context); - - viennacl::copy(A, mat); - - unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1()); - -#ifdef VIENNACL_WITH_OPENMP - #pragma omp parallel for -#endif - for (long i2=0; i2<static_cast<long>(block_indices_.size()); ++i2) - { - vcl_size_t i = static_cast<vcl_size_t>(i2); - // Step 1: Extract blocks - vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first; - vcl_size_t block_nnz = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first]; - viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context); - - detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second); - - // Step 2: Precondition blocks: - viennacl::switch_memory_context(L_blocks[i], host_context); - viennacl::switch_memory_context(U_blocks[i], host_context); - init_dispatch(mat_block, L_blocks[i], U_blocks[i], tag_); - } - - } - - void init_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block, - viennacl::compressed_matrix<ScalarType> & L, - viennacl::compressed_matrix<ScalarType> & U, - viennacl::linalg::ilu0_tag) - { - (void)U; - L = mat_block; - viennacl::linalg::precondition(L, tag_); - } - - void init_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block, - viennacl::compressed_matrix<ScalarType> & L, - viennacl::compressed_matrix<ScalarType> & U, - viennacl::linalg::ilut_tag) - { - L.resize(mat_block.size1(), mat_block.size2()); - U.resize(mat_block.size1(), mat_block.size2()); - viennacl::linalg::precondition(mat_block, L, U, tag_); - } - - template<typename VectorT> - void apply_dispatch(VectorT & vec, vcl_size_t i, viennacl::linalg::ilu0_tag) const - { - detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].size2()); - - unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1()); - unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2()); - ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle()); - - viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag()); - viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), upper_tag()); - } - - template<typename VectorT> - void apply_dispatch(VectorT & vec, vcl_size_t i, viennacl::linalg::ilut_tag) const - { - detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].size2()); - - { - unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1()); - unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2()); - ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle()); - - viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag()); - } - - { - unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle1()); - unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle2()); - ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U_blocks[i].handle()); - - viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, U_blocks[i].size2(), upper_tag()); - } - } - - ILUTag tag_; - index_vector_type block_indices_; - std::vector< viennacl::compressed_matrix<ScalarType> > L_blocks; - std::vector< viennacl::compressed_matrix<ScalarType> > U_blocks; -}; - - - - - -/** @brief ILUT preconditioner class, can be supplied to solve()-routines. -* -* Specialization for compressed_matrix -*/ -template<typename NumericT, unsigned int AlignmentV, typename ILUTagT> -class block_ilu_precond< compressed_matrix<NumericT, AlignmentV>, ILUTagT> -{ - typedef compressed_matrix<NumericT, AlignmentV> MatrixType; - -public: - typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block - - - block_ilu_precond(MatrixType const & mat, - ILUTagT const & tag, - vcl_size_t num_blocks = 8 - ) : tag_(tag), - block_indices_(num_blocks), - gpu_block_indices_(), - gpu_L_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)), - gpu_U_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)), - gpu_D_(mat.size1(), viennacl::context(viennacl::MAIN_MEMORY)), - L_blocks_(num_blocks), - U_blocks_(num_blocks) - { - // Set up vector of block indices: - block_indices_.resize(num_blocks); - for (vcl_size_t i=0; i<num_blocks; ++i) - { - vcl_size_t start_index = ( i * mat.size1()) / num_blocks; - vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks; - - block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index); - } - - //initialize preconditioner: - //std::cout << "Start CPU precond" << std::endl; - init(mat); - //std::cout << "End CPU precond" << std::endl; - } - - block_ilu_precond(MatrixType const & mat, - ILUTagT const & tag, - index_vector_type const & block_boundaries - ) : tag_(tag), - block_indices_(block_boundaries), - gpu_block_indices_(), - gpu_L_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)), - gpu_U_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)), - gpu_D_(mat.size1(), viennacl::context(viennacl::MAIN_MEMORY)), - L_blocks_(block_boundaries.size()), - U_blocks_(block_boundaries.size()) - { - //initialize preconditioner: - //std::cout << "Start CPU precond" << std::endl; - init(mat); - //std::cout << "End CPU precond" << std::endl; - } - - - void apply(vector<NumericT> & vec) const - { - viennacl::linalg::detail::block_inplace_solve(trans(gpu_L_trans_), gpu_block_indices_, block_indices_.size(), gpu_D_, - vec, - viennacl::linalg::unit_lower_tag()); - - viennacl::linalg::detail::block_inplace_solve(trans(gpu_U_trans_), gpu_block_indices_, block_indices_.size(), gpu_D_, - vec, - viennacl::linalg::upper_tag()); - - //apply_cpu(vec); - } - - -private: - - void init(MatrixType const & A) - { - viennacl::context host_context(viennacl::MAIN_MEMORY); - viennacl::compressed_matrix<NumericT> mat(host_context); - - mat = A; - - unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1()); - -#ifdef VIENNACL_WITH_OPENMP - #pragma omp parallel for -#endif - for (long i=0; i<static_cast<long>(block_indices_.size()); ++i) - { - // Step 1: Extract blocks - vcl_size_t block_size = block_indices_[static_cast<vcl_size_t>(i)].second - block_indices_[static_cast<vcl_size_t>(i)].first; - vcl_size_t block_nnz = row_buffer[block_indices_[static_cast<vcl_size_t>(i)].second] - row_buffer[block_indices_[static_cast<vcl_size_t>(i)].first]; - viennacl::compressed_matrix<NumericT> mat_block(block_size, block_size, block_nnz, host_context); - - detail::extract_block_matrix(mat, mat_block, block_indices_[static_cast<vcl_size_t>(i)].first, block_indices_[static_cast<vcl_size_t>(i)].second); - - // Step 2: Precondition blocks: - viennacl::switch_memory_context(L_blocks_[static_cast<vcl_size_t>(i)], host_context); - viennacl::switch_memory_context(U_blocks_[static_cast<vcl_size_t>(i)], host_context); - init_dispatch(mat_block, L_blocks_[static_cast<vcl_size_t>(i)], U_blocks_[static_cast<vcl_size_t>(i)], tag_); - } - - /* - * copy resulting preconditioner back to GPU: - */ - viennacl::backend::typesafe_host_array<unsigned int> block_indices_uint(gpu_block_indices_, 2 * block_indices_.size()); - for (vcl_size_t i=0; i<block_indices_.size(); ++i) - { - block_indices_uint.set(2*i, block_indices_[i].first); - block_indices_uint.set(2*i + 1, block_indices_[i].second); - } - - viennacl::backend::memory_create(gpu_block_indices_, block_indices_uint.raw_size(), viennacl::traits::context(A), block_indices_uint.get()); - - blocks_to_device(A); - - } - - // Copy computed preconditioned blocks to OpenCL device - void blocks_to_device(MatrixType const & A) - { - gpu_L_trans_.resize(A.size1(), A.size2()); - gpu_U_trans_.resize(A.size1(), A.size2()); - gpu_D_.resize(A.size1()); - - unsigned int * L_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle1()); - unsigned int * U_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle1()); - - // - // Count elements per row - // -#ifdef VIENNACL_WITH_OPENMP - #pragma omp parallel for -#endif - for (long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2) - { - vcl_size_t block_index = vcl_size_t(block_index2); - - unsigned int block_start = static_cast<unsigned int>(block_indices_[block_index].first); - unsigned int block_stop = static_cast<unsigned int>(block_indices_[block_index].second); - - unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1()); - unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2()); - - // zero row array of L: - std::fill(L_trans_row_buffer + block_start, - L_trans_row_buffer + block_stop, - static_cast<unsigned int>(0)); - - // count number of elements per row: - for (vcl_size_t row = 0; row < L_blocks_[block_index].size1(); ++row) - { - unsigned int col_start = L_row_buffer[row]; - unsigned int col_end = L_row_buffer[row+1]; - - for (unsigned int j = col_start; j < col_end; ++j) - { - unsigned int col = L_col_buffer[j]; - if (col < static_cast<unsigned int>(row)) - L_trans_row_buffer[col + block_start] += 1; - } - } - - ////// same for U - - unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1()); - unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2()); - - // zero row array of U: - std::fill(U_trans_row_buffer + block_start, - U_trans_row_buffer + block_stop, - static_cast<unsigned int>(0)); - - // count number of elements per row: - for (vcl_size_t row = 0; row < U_blocks_[block_index].size1(); ++row) - { - unsigned int col_start = U_row_buffer[row]; - unsigned int col_end = U_row_buffer[row+1]; - - for (unsigned int j = col_start; j < col_end; ++j) - { - unsigned int col = U_col_buffer[j]; - if (col > row) - U_trans_row_buffer[col + block_start] += 1; - } - } - } - - - // - // Exclusive scan on row buffer (feel free to add parallelization here) - // - unsigned int current_value = 0; - for (vcl_size_t i=0; i<gpu_L_trans_.size1(); ++i) - { - unsigned int tmp = L_trans_row_buffer[i]; - L_trans_row_buffer[i] = current_value; - current_value += tmp; - } - gpu_L_trans_.reserve(current_value); - - current_value = 0; - for (vcl_size_t i=0; i<gpu_U_trans_.size1(); ++i) - { - unsigned int tmp = U_trans_row_buffer[i]; - U_trans_row_buffer[i] = current_value; - current_value += tmp; - } - gpu_U_trans_.reserve(current_value); - - - // - // Fill with data - // - unsigned int * L_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle2()); - NumericT * L_trans_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_L_trans_.handle()); - - unsigned int * U_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle2()); - NumericT * U_trans_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_U_trans_.handle()); - - NumericT * D_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_D_.handle()); - - std::vector<unsigned int> offset_L(gpu_L_trans_.size1()); - std::vector<unsigned int> offset_U(gpu_U_trans_.size1()); - -#ifdef VIENNACL_WITH_OPENMP - #pragma omp parallel for -#endif - for (long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2) - { - vcl_size_t block_index = vcl_size_t(block_index2); - unsigned int block_start = static_cast<unsigned int>(block_indices_[block_index].first); - - unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1()); - unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2()); - NumericT const * L_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT >(L_blocks_[block_index].handle()); - - - // write L_trans: - for (vcl_size_t row = 0; row < L_blocks_[block_index].size1(); ++row) - { - unsigned int col_start = L_row_buffer[row]; - unsigned int col_end = L_row_buffer[row+1]; - - for (unsigned int j = col_start; j < col_end; ++j) - { - unsigned int col = L_col_buffer[j]; - if (col < row) - { - unsigned int row_trans = col + block_start; - unsigned int k = L_trans_row_buffer[row_trans] + offset_L[row_trans]; - offset_L[row_trans] += 1; - - L_trans_col_buffer[k] = static_cast<unsigned int>(row) + block_start; - L_trans_elements[k] = L_elements[j]; - } - } - } - - unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1()); - unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2()); - NumericT const * U_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT >(U_blocks_[block_index].handle()); - - // write U_trans and D: - for (vcl_size_t row = 0; row < U_blocks_[block_index].size1(); ++row) - { - unsigned int col_start = U_row_buffer[row]; - unsigned int col_end = U_row_buffer[row+1]; - - for (unsigned int j = col_start; j < col_end; ++j) - { - unsigned int row_trans = U_col_buffer[j] + block_start; - unsigned int k = U_trans_row_buffer[row_trans] + offset_U[row_trans]; - - if (row_trans == row + block_start) // entry for D - { - D_elements[row_trans] = U_elements[j]; - } - else if (row_trans > row + block_start) //entry for U - { - offset_U[row_trans] += 1; - - U_trans_col_buffer[k] = static_cast<unsigned int>(row) + block_start; - U_trans_elements[k] = U_elements[j]; - } - } - } - - } - - // - // Send to destination device: - // - viennacl::switch_memory_context(gpu_L_trans_, viennacl::traits::context(A)); - viennacl::switch_memory_context(gpu_U_trans_, viennacl::traits::context(A)); - viennacl::switch_memory_context(gpu_D_, viennacl::traits::context(A)); - } - - void init_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block, - viennacl::compressed_matrix<NumericT> & L, - viennacl::compressed_matrix<NumericT> & U, - viennacl::linalg::ilu0_tag) - { - L = mat_block; - viennacl::linalg::precondition(L, tag_); - U = L; // fairly poor workaround... - } - - void init_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block, - viennacl::compressed_matrix<NumericT> & L, - viennacl::compressed_matrix<NumericT> & U, - viennacl::linalg::ilut_tag) - { - L.resize(mat_block.size1(), mat_block.size2()); - U.resize(mat_block.size1(), mat_block.size2()); - viennacl::linalg::precondition(mat_block, L, U, tag_); - } - - - ILUTagT tag_; - index_vector_type block_indices_; - viennacl::backend::mem_handle gpu_block_indices_; - viennacl::compressed_matrix<NumericT> gpu_L_trans_; - viennacl::compressed_matrix<NumericT> gpu_U_trans_; - viennacl::vector<NumericT> gpu_D_; - - std::vector<MatrixType> L_blocks_; - std::vector<MatrixType> U_blocks_; -}; - - -} -} - - - - -#endif - - - http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp deleted file mode 100644 index 7628cdb..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp +++ /dev/null @@ -1,316 +0,0 @@ -#ifndef VIENNACL_LINALG_DETAIL_CHOW_PATEL_ILU_HPP_ -#define VIENNACL_LINALG_DETAIL_CHOW_PATEL_ILU_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the PDF manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/detail/ilu/chow_patel_ilu.hpp - @brief Implementations of incomplete factorization preconditioners with fine-grained parallelism. - - Based on "Fine-Grained Parallel Incomplete LU Factorization" by Chow and Patel, SIAM J. Sci. Comput., vol. 37, no. 2, pp. C169-C193 -*/ - -#include <vector> -#include <cmath> -#include <iostream> -#include "viennacl/forwards.h" -#include "viennacl/tools/tools.hpp" -#include "viennacl/linalg/detail/ilu/common.hpp" -#include "viennacl/linalg/ilu_operations.hpp" -#include "viennacl/linalg/prod.hpp" -#include "viennacl/backend/memory.hpp" - -namespace viennacl -{ -namespace linalg -{ - -/** @brief A tag for incomplete LU and incomplete Cholesky factorization with static pattern (Parallel-ILU0, Parallel ICC0) -*/ -class chow_patel_tag -{ -public: - /** @brief Constructor allowing to set the number of sweeps and Jacobi iterations. - * - * @param num_sweeps Number of sweeps in setup phase - * @param num_jacobi_iters Number of Jacobi iterations for each triangular 'solve' when applying the preconditioner to a vector - */ - chow_patel_tag(vcl_size_t num_sweeps = 3, vcl_size_t num_jacobi_iters = 2) : sweeps_(num_sweeps), jacobi_iters_(num_jacobi_iters) {} - - /** @brief Returns the number of sweeps (i.e. number of nonlinear iterations) in the solver setup stage */ - vcl_size_t sweeps() const { return sweeps_; } - /** @brief Sets the number of sweeps (i.e. number of nonlinear iterations) in the solver setup stage */ - void sweeps(vcl_size_t num) { sweeps_ = num; } - - /** @brief Returns the number of Jacobi iterations (i.e. applications of x_{k+1} = (I - D^{-1}R)x_k + D^{-1} b) for each of the solves y = U^{-1} x and z = L^{-1} y) for each preconditioner application. */ - vcl_size_t jacobi_iters() const { return jacobi_iters_; } - /** @brief Sets the number of Jacobi iterations for each triangular 'solve' when applying the preconditioner to a vector. */ - void jacobi_iters(vcl_size_t num) { jacobi_iters_ = num; } - -private: - vcl_size_t sweeps_; - vcl_size_t jacobi_iters_; -}; - -namespace detail -{ - /** @brief Implementation of the parallel ICC0 factorization, Algorithm 3 in Chow-Patel paper. - * - * Rather than dealing with a column-major upper triangular matrix U, we use the lower-triangular matrix L such that A is approximately given by LL^T. - * The advantage is that L is readily available in row-major format. - */ - template<typename NumericT> - void precondition(viennacl::compressed_matrix<NumericT> const & A, - viennacl::compressed_matrix<NumericT> & L, - viennacl::vector<NumericT> & diag_L, - viennacl::compressed_matrix<NumericT> & L_trans, - chow_patel_tag const & tag) - { - // make sure L and U have correct dimensions: - L.resize(A.size1(), A.size2(), false); - - // initialize L and U from values in A: - viennacl::linalg::extract_L(A, L); - - // diagonally scale values from A in L: - viennacl::linalg::icc_scale(A, L); - - viennacl::vector<NumericT> aij_L(L.nnz(), viennacl::traits::context(A)); - viennacl::backend::memory_copy(L.handle(), aij_L.handle(), 0, 0, sizeof(NumericT) * L.nnz()); - - // run sweeps: - for (vcl_size_t i=0; i<tag.sweeps(); ++i) - viennacl::linalg::icc_chow_patel_sweep(L, aij_L); - - // transpose L to obtain L_trans: - viennacl::linalg::ilu_transpose(L, L_trans); - - // form (I - D_L^{-1}L) and (I - D_U^{-1} U), with U := L_trans - viennacl::linalg::ilu_form_neumann_matrix(L, diag_L); - viennacl::linalg::ilu_form_neumann_matrix(L_trans, diag_L); - } - - - /** @brief Implementation of the parallel ILU0 factorization, Algorithm 2 in Chow-Patel paper. */ - template<typename NumericT> - void precondition(viennacl::compressed_matrix<NumericT> const & A, - viennacl::compressed_matrix<NumericT> & L, - viennacl::vector<NumericT> & diag_L, - viennacl::compressed_matrix<NumericT> & U, - viennacl::vector<NumericT> & diag_U, - chow_patel_tag const & tag) - { - // make sure L and U have correct dimensions: - L.resize(A.size1(), A.size2(), false); - U.resize(A.size1(), A.size2(), false); - - // initialize L and U from values in A: - viennacl::linalg::extract_LU(A, L, U); - - // diagonally scale values from A in L and U: - viennacl::linalg::ilu_scale(A, L, U); - - // transpose storage layout of U from CSR to CSC via transposition - viennacl::compressed_matrix<NumericT> U_trans; - viennacl::linalg::ilu_transpose(U, U_trans); - - // keep entries of a_ij for the sweeps - viennacl::vector<NumericT> aij_L (L.nnz(), viennacl::traits::context(A)); - viennacl::vector<NumericT> aij_U_trans(U_trans.nnz(), viennacl::traits::context(A)); - - viennacl::backend::memory_copy( L.handle(), aij_L.handle(), 0, 0, sizeof(NumericT) * L.nnz()); - viennacl::backend::memory_copy(U_trans.handle(), aij_U_trans.handle(), 0, 0, sizeof(NumericT) * U_trans.nnz()); - - // run sweeps: - for (vcl_size_t i=0; i<tag.sweeps(); ++i) - viennacl::linalg::ilu_chow_patel_sweep(L, aij_L, U_trans, aij_U_trans); - - // transpose U_trans back: - viennacl::linalg::ilu_transpose(U_trans, U); - - // form (I - D_L^{-1}L) and (I - D_U^{-1} U) - viennacl::linalg::ilu_form_neumann_matrix(L, diag_L); - viennacl::linalg::ilu_form_neumann_matrix(U, diag_U); - } - -} - - - - -/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines -*/ -template<typename MatrixT> -class chow_patel_icc_precond -{ - // only works with compressed_matrix! - typedef typename MatrixT::CHOW_PATEL_ICC_ONLY_WORKS_WITH_COMPRESSED_MATRIX error_type; -}; - - -/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines. -* -* Specialization for compressed_matrix -*/ -template<typename NumericT, unsigned int AlignmentV> -class chow_patel_icc_precond< viennacl::compressed_matrix<NumericT, AlignmentV> > -{ - -public: - chow_patel_icc_precond(viennacl::compressed_matrix<NumericT, AlignmentV> const & A, chow_patel_tag const & tag) - : tag_(tag), - L_(0, 0, 0, viennacl::traits::context(A)), - diag_L_(A.size1(), viennacl::traits::context(A)), - L_trans_(0, 0, 0, viennacl::traits::context(A)), - x_k_(A.size1(), viennacl::traits::context(A)), - b_(A.size1(), viennacl::traits::context(A)) - { - viennacl::linalg::detail::precondition(A, L_, diag_L_, L_trans_, tag_); - } - - /** @brief Preconditioner application: LL^Tx = b, computed via Ly = b, L^Tx = y using Jacobi iterations. - * - * L contains (I - D_L^{-1}L), L_trans contains (I - D_L^{-1}L^T) where D denotes the respective diagonal matrix - */ - template<typename VectorT> - void apply(VectorT & vec) const - { - // - // y = L^{-1} b through Jacobi iteration y_{k+1} = (I - D^{-1}L)y_k + D^{-1}x - // - b_ = viennacl::linalg::element_div(vec, diag_L_); - x_k_ = b_; - for (unsigned int i=0; i<tag_.jacobi_iters(); ++i) - { - vec = viennacl::linalg::prod(L_, x_k_); - x_k_ = vec + b_; - } - - // - // x = U^{-1} y through Jacobi iteration x_{k+1} = (I - D^{-1}L^T)x_k + D^{-1}b - // - b_ = viennacl::linalg::element_div(x_k_, diag_L_); - x_k_ = b_; // x_1 if x_0 \equiv 0 - for (unsigned int i=0; i<tag_.jacobi_iters(); ++i) - { - vec = viennacl::linalg::prod(L_trans_, x_k_); - x_k_ = vec + b_; - } - - // return result: - vec = x_k_; - } - -private: - chow_patel_tag tag_; - viennacl::compressed_matrix<NumericT> L_; - viennacl::vector<NumericT> diag_L_; - viennacl::compressed_matrix<NumericT> L_trans_; - - mutable viennacl::vector<NumericT> x_k_; - mutable viennacl::vector<NumericT> b_; -}; - - - - - - -/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines -*/ -template<typename MatrixT> -class chow_patel_ilu_precond -{ - // only works with compressed_matrix! - typedef typename MatrixT::CHOW_PATEL_ILU_ONLY_WORKS_WITH_COMPRESSED_MATRIX error_type; -}; - - -/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines. -* -* Specialization for compressed_matrix -*/ -template<typename NumericT, unsigned int AlignmentV> -class chow_patel_ilu_precond< viennacl::compressed_matrix<NumericT, AlignmentV> > -{ - -public: - chow_patel_ilu_precond(viennacl::compressed_matrix<NumericT, AlignmentV> const & A, chow_patel_tag const & tag) - : tag_(tag), - L_(0, 0, 0, viennacl::traits::context(A)), - diag_L_(A.size1(), viennacl::traits::context(A)), - U_(0, 0, 0, viennacl::traits::context(A)), - diag_U_(A.size1(), viennacl::traits::context(A)), - x_k_(A.size1(), viennacl::traits::context(A)), - b_(A.size1(), viennacl::traits::context(A)) - { - viennacl::linalg::detail::precondition(A, L_, diag_L_, U_, diag_U_, tag_); - } - - /** @brief Preconditioner application: LUx = b, computed via Ly = b, Ux = y using Jacobi iterations. - * - * L_ contains (I - D_L^{-1}L), U_ contains (I - D_U^{-1}U) where D denotes the respective diagonal matrix - */ - template<typename VectorT> - void apply(VectorT & vec) const - { - // - // y = L^{-1} b through Jacobi iteration y_{k+1} = (I - D^{-1}L)y_k + D^{-1}x - // - b_ = viennacl::linalg::element_div(vec, diag_L_); - x_k_ = b_; - for (unsigned int i=0; i<tag_.jacobi_iters(); ++i) - { - vec = viennacl::linalg::prod(L_, x_k_); - x_k_ = vec + b_; - } - - // - // x = U^{-1} y through Jacobi iteration x_{k+1} = (I - D^{-1}U)x_k + D^{-1}b - // - b_ = viennacl::linalg::element_div(x_k_, diag_U_); - x_k_ = b_; // x_1 if x_0 \equiv 0 - for (unsigned int i=0; i<tag_.jacobi_iters(); ++i) - { - vec = viennacl::linalg::prod(U_, x_k_); - x_k_ = vec + b_; - } - - // return result: - vec = x_k_; - } - -private: - chow_patel_tag tag_; - viennacl::compressed_matrix<NumericT> L_; - viennacl::vector<NumericT> diag_L_; - viennacl::compressed_matrix<NumericT> U_; - viennacl::vector<NumericT> diag_U_; - - mutable viennacl::vector<NumericT> x_k_; - mutable viennacl::vector<NumericT> b_; -}; - - -} // namespace linalg -} // namespace viennacl - - -#endif - - -
