http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp new file mode 100644 index 0000000..8361308 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/amg/amg_base.hpp @@ -0,0 +1,208 @@ +#ifndef VIENNACL_LINALG_DETAIL_AMG_AMG_BASE_HPP_ +#define VIENNACL_LINALG_DETAIL_AMG_AMG_BASE_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file amg_base.hpp + @brief Helper classes and functions for the AMG preconditioner. Experimental. + + AMG code contributed by Markus Wagner +*/ + +#include <cmath> +#include <set> +#include <list> +#include <stdexcept> +#include <algorithm> + +#include <map> +#ifdef VIENNACL_WITH_OPENMP +#include <omp.h> +#endif + +#include "viennacl/context.hpp" + +namespace viennacl +{ +namespace linalg +{ + +/** @brief Enumeration of coarsening methods for algebraic multigrid. */ +enum amg_coarsening_method +{ + AMG_COARSENING_METHOD_ONEPASS = 1, + AMG_COARSENING_METHOD_AGGREGATION, + AMG_COARSENING_METHOD_MIS2_AGGREGATION +}; + +/** @brief Enumeration of interpolation methods for algebraic multigrid. */ +enum amg_interpolation_method +{ + AMG_INTERPOLATION_METHOD_DIRECT = 1, + AMG_INTERPOLATION_METHOD_AGGREGATION, + AMG_INTERPOLATION_METHOD_SMOOTHED_AGGREGATION +}; + + +/** @brief A tag for algebraic multigrid (AMG). Used to transport information from the user to the implementation. +*/ +class amg_tag +{ +public: + /** @brief The constructor, setting default values for the various parameters. + * + * Default coarsening routine: Aggreggation based on maximum independent sets of distance (MIS-2) + * Default interpolation routine: Smoothed aggregation + * Default threshold for strong connections: 0.1 (customizations are recommeded!) + * Default weight for Jacobi smoother: 1.0 + * Default number of pre-smooth operations: 2 + * Default number of post-smooth operations: 2 + * Default number of coarse levels: 0 (this indicates that as many coarse levels as needed are constructed until the cutoff is reached) + * Default coarse grid size for direct solver (coarsening cutoff): 50 + */ + amg_tag() + : coarsening_method_(AMG_COARSENING_METHOD_MIS2_AGGREGATION), interpolation_method_(AMG_INTERPOLATION_METHOD_AGGREGATION), + strong_connection_threshold_(0.1), jacobi_weight_(1.0), + presmooth_steps_(2), postsmooth_steps_(2), + coarse_levels_(0), coarse_cutoff_(50) {} + + // Getter-/Setter-Functions + /** @brief Sets the strategy used for constructing coarse grids */ + void set_coarsening_method(amg_coarsening_method s) { coarsening_method_ = s; } + /** @brief Returns the current coarsening strategy */ + amg_coarsening_method get_coarsening_method() const { return coarsening_method_; } + + /** @brief Sets the interpolation method to the provided method */ + void set_interpolation_method(amg_interpolation_method interpol) { interpolation_method_ = interpol; } + /** @brief Returns the current interpolation method */ + amg_interpolation_method get_interpolation_method() const { return interpolation_method_; } + + /** @brief Sets the strong connection threshold. Customizations by the user essential for best results! + * + * With classical interpolation, a connection is considered strong if |a_ij| >= threshold * max_k(|a_ik|) + * Strength of connection currently ignored for aggregation-based coarsening (to be added in the future). + */ + void set_strong_connection_threshold(double threshold) { if (threshold > 0) strong_connection_threshold_ = threshold; } + /** @brief Returns the strong connection threshold parameter. + * + * @see set_strong_connection_threshold() for an explanation of the threshold parameter + */ + double get_strong_connection_threshold() const { return strong_connection_threshold_; } + + /** @brief Sets the weight (damping) for the Jacobi smoother. + * + * The optimal value depends on the problem at hand. Values of 0.67 or 1.0 are usually a good starting point for further experiments. + */ + void set_jacobi_weight(double w) { if (w > 0) jacobi_weight_ = w; } + /** @brief Returns the Jacobi smoother weight (damping). */ + double get_jacobi_weight() const { return jacobi_weight_; } + + /** @brief Sets the number of smoother applications on the fine level before restriction to the coarser level. */ + void set_presmooth_steps(vcl_size_t steps) { presmooth_steps_ = steps; } + /** @brief Returns the number of smoother applications on the fine level before restriction to the coarser level. */ + vcl_size_t get_presmooth_steps() const { return presmooth_steps_; } + + /** @brief Sets the number of smoother applications on the coarse level before interpolation to the finer level. */ + void set_postsmooth_steps(vcl_size_t steps) { postsmooth_steps_ = steps; } + /** @brief Returns the number of smoother applications on the coarse level before interpolation to the finer level. */ + vcl_size_t get_postsmooth_steps() const { return postsmooth_steps_; } + + /** @brief Sets the number of coarse levels. If set to zero, then coarse levels are constructed until the cutoff size is reached. */ + void set_coarse_levels(vcl_size_t levels) { coarse_levels_ = levels; } + /** @brief Returns the number of coarse levels. If zero, then coarse levels are constructed until the cutoff size is reached. */ + vcl_size_t get_coarse_levels() const { return coarse_levels_; } + + /** @brief Sets the coarse grid size for which the recursive multigrid scheme is stopped and a direct solver is used. */ + void set_coarsening_cutoff(vcl_size_t size) { coarse_cutoff_ = size; } + /** @brief Returns the coarse grid size for which the recursive multigrid scheme is stopped and a direct solver is used. */ + vcl_size_t get_coarsening_cutoff() const { return coarse_cutoff_; } + + /** @brief Sets the ViennaCL context for the setup stage. Set this to a host context if you want to run the setup on the host. + * + * Set the ViennaCL context for the solver application via set_target_context(). + * Target and setup context can be different. + */ + void set_setup_context(viennacl::context ctx) { setup_ctx_ = ctx; } + /** @brief Returns the ViennaCL context for the preconditioenr setup. */ + viennacl::context const & get_setup_context() const { return setup_ctx_; } + + /** @brief Sets the ViennaCL context for the solver cycle stage (i.e. preconditioner applications). + * + * Since the cycle stage easily benefits from accelerators, you usually want to set this to a CUDA or OpenCL-enabled context. + */ + void set_target_context(viennacl::context ctx) { target_ctx_ = ctx; } + /** @brief Returns the ViennaCL context for the solver cycle stage (i.e. preconditioner applications). */ + viennacl::context const & get_target_context() const { return target_ctx_; } + +private: + amg_coarsening_method coarsening_method_; + amg_interpolation_method interpolation_method_; + double strong_connection_threshold_, jacobi_weight_; + vcl_size_t presmooth_steps_, postsmooth_steps_, coarse_levels_, coarse_cutoff_; + viennacl::context setup_ctx_, target_ctx_; +}; + + +namespace detail +{ +namespace amg +{ + + + struct amg_level_context + { + void resize(vcl_size_t num_points, vcl_size_t max_nnz) + { + influence_jumper_.resize(num_points + 1, false); + influence_ids_.resize(max_nnz, false); + influence_values_.resize(num_points, false); + point_types_.resize(num_points, false); + coarse_id_.resize(num_points, false); + } + + void switch_context(viennacl::context ctx) + { + influence_jumper_.switch_memory_context(ctx); + influence_ids_.switch_memory_context(ctx); + influence_values_.switch_memory_context(ctx); + point_types_.switch_memory_context(ctx); + coarse_id_.switch_memory_context(ctx); + } + + enum + { + POINT_TYPE_UNDECIDED = 0, + POINT_TYPE_COARSE, + POINT_TYPE_FINE + } amg_point_types; + + viennacl::vector<unsigned int> influence_jumper_; // similar to row_buffer for CSR matrices + viennacl::vector<unsigned int> influence_ids_; // IDs of influencing points + viennacl::vector<unsigned int> influence_values_; // Influence measure for each point + viennacl::vector<unsigned int> point_types_; // 0: undecided, 1: coarse point, 2: fine point. Using char here because type for enum might be a larger type + viennacl::vector<unsigned int> coarse_id_; // coarse ID used on the next level. Only valid for coarse points. Fine points may (ab)use their entry for something else. + unsigned int num_coarse_; + }; + + +} //namespace amg +} +} +} + +#endif
http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp new file mode 100644 index 0000000..8308f77 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp @@ -0,0 +1,191 @@ +#ifndef VIENNACL_LINALG_DETAIL_BISECT_KERNEL_CALLS_HPP_ +#define VIENNACL_LINALG_DETAIL_BISECT_KERNEL_CALLS_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp + @brief Kernel calls for the bisection algorithm + + Implementation based on the sample provided with the CUDA 6.0 SDK, for which + the creation of derivative works is allowed by including the following statement: + "This software contains source code provided by NVIDIA Corporation." +*/ + + +#include "viennacl/forwards.h" +#include "viennacl/scalar.hpp" +#include "viennacl/vector.hpp" +#include "viennacl/vector_proxy.hpp" +#include "viennacl/tools/tools.hpp" +#include "viennacl/meta/enable_if.hpp" +#include "viennacl/meta/predicate.hpp" +#include "viennacl/meta/result_of.hpp" +#include "viennacl/traits/size.hpp" +#include "viennacl/traits/start.hpp" +#include "viennacl/traits/handle.hpp" +#include "viennacl/traits/stride.hpp" + +#include "viennacl/linalg/detail/bisect/structs.hpp" +#ifdef VIENNACL_WITH_OPENCL + #include "viennacl/linalg/opencl/bisect_kernel_calls.hpp" +#endif + +#ifdef VIENNACL_WITH_CUDA + #include "viennacl/linalg/cuda/bisect_kernel_calls.hpp" +#endif + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ + template<typename NumericT> + void bisectSmall(const InputData<NumericT> &input, ResultDataSmall<NumericT> &result, + const unsigned int mat_size, + const NumericT lg, const NumericT ug, + const NumericT precision) + { + switch (viennacl::traits::handle(input.g_a).get_active_handle_id()) + { +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::bisectSmall(input, result, + mat_size, + lg,ug, + precision); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::bisectSmall(input, result, + mat_size, + lg,ug, + precision); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + + + template<typename NumericT> + void bisectLarge(const InputData<NumericT> &input, ResultDataLarge<NumericT> &result, + const unsigned int mat_size, + const NumericT lg, const NumericT ug, + const NumericT precision) + { + switch (viennacl::traits::handle(input.g_a).get_active_handle_id()) + { +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::bisectLarge(input, result, + mat_size, + lg,ug, + precision); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::bisectLarge(input, result, + mat_size, + lg,ug, + precision); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + + + + template<typename NumericT> + void bisectLarge_OneIntervals(const InputData<NumericT> &input, ResultDataLarge<NumericT> &result, + const unsigned int mat_size, + const NumericT precision) + { + switch (viennacl::traits::handle(input.g_a).get_active_handle_id()) + { +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::bisectLargeOneIntervals(input, result, + mat_size, + precision); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::bisectLarge_OneIntervals(input, result, + mat_size, + precision); + + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } + + + + + template<typename NumericT> + void bisectLarge_MultIntervals(const InputData<NumericT> &input, ResultDataLarge<NumericT> &result, + const unsigned int mat_size, + const NumericT precision) + { + switch (viennacl::traits::handle(input.g_a).get_active_handle_id()) + { +#ifdef VIENNACL_WITH_OPENCL + case viennacl::OPENCL_MEMORY: + viennacl::linalg::opencl::bisectLargeMultIntervals(input, result, + mat_size, + precision); + break; +#endif +#ifdef VIENNACL_WITH_CUDA + case viennacl::CUDA_MEMORY: + viennacl::linalg::cuda::bisectLarge_MultIntervals(input, result, + mat_size, + precision); + break; +#endif + case viennacl::MEMORY_NOT_INITIALIZED: + throw memory_exception("not initialised!"); + default: + throw memory_exception("not implemented"); + } + } +} // namespace detail +} // namespace linalg +} //namespace viennacl + + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp new file mode 100755 index 0000000..337858f --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_large.hpp @@ -0,0 +1,142 @@ +#ifndef VIENNACL_LINALG_DETAIL_BISECT_BISECT_LARGE_HPP_ +#define VIENNACL_LINALG_DETAIL_BISECT_BISECT_LARGE_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/linalg/detail//bisect/bisect_large.hpp + @brief Computation of eigenvalues of a large symmetric, tridiagonal matrix + + Implementation based on the sample provided with the CUDA 6.0 SDK, for which + the creation of derivative works is allowed by including the following statement: + "This software contains source code provided by NVIDIA Corporation." +*/ + + +// includes, system +#include <iostream> +#include <iomanip> +#include <stdlib.h> +#include <stdio.h> + +// includes, project +#include "viennacl/linalg/detail/bisect/config.hpp" +#include "viennacl/linalg/detail/bisect/structs.hpp" +#include "viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp" + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ + +//////////////////////////////////////////////////////////////////////////////// +//! Run the kernels to compute the eigenvalues for large matrices +//! @param input handles to input data +//! @param result handles to result data +//! @param mat_size matrix size +//! @param precision desired precision of eigenvalues +//! @param lg lower limit of Gerschgorin interval +//! @param ug upper limit of Gerschgorin interval +//////////////////////////////////////////////////////////////////////////////// +template<typename NumericT> +void +computeEigenvaluesLargeMatrix(InputData<NumericT> &input, ResultDataLarge<NumericT> &result, + const unsigned int mat_size, + const NumericT lg, const NumericT ug, const NumericT precision) +{ + + + // First kernel call: decide on which intervals bisect_Large_OneIntervals/ + // bisect_Large_MultIntervals is executed + viennacl::linalg::detail::bisectLarge(input, result, mat_size, lg, ug, precision); + + // compute eigenvalues for intervals that contained only one eigenvalue + // after the first processing step + viennacl::linalg::detail::bisectLarge_OneIntervals(input, result, mat_size, precision); + + // process intervals that contained more than one eigenvalue after + // the first processing step + viennacl::linalg::detail::bisectLarge_MultIntervals(input, result, mat_size, precision); + +} + +//////////////////////////////////////////////////////////////////////////////// +//! Process the result, that is obtain result from device and do simple sanity +//! checking +//! @param result handles to result data +//! @param mat_size matrix size +//////////////////////////////////////////////////////////////////////////////// +template<typename NumericT> +bool +processResultDataLargeMatrix(ResultDataLarge<NumericT> &result, + const unsigned int mat_size) +{ + bool bCompareResult = true; + // copy data from intervals that contained more than one eigenvalue after + // the first processing step + std::vector<NumericT> lambda_mult(mat_size); + viennacl::copy(result.g_lambda_mult, lambda_mult); + + std::vector<unsigned int> pos_mult(mat_size); + viennacl::copy(result.g_pos_mult, pos_mult); + + std::vector<unsigned int> blocks_mult_sum(mat_size); + viennacl::copy(result.g_blocks_mult_sum, blocks_mult_sum); + + unsigned int num_one_intervals = result.g_num_one; + unsigned int sum_blocks_mult = mat_size - num_one_intervals; + + + // copy data for intervals that contained one eigenvalue after the first + // processing step + std::vector<NumericT> left_one(mat_size); + std::vector<NumericT> right_one(mat_size); + std::vector<unsigned int> pos_one(mat_size); + + viennacl::copy(result.g_left_one, left_one); + viennacl::copy(result.g_right_one, right_one); + viennacl::copy(result.g_pos_one, pos_one); + + + // singleton intervals generated in the second step + for (unsigned int i = 0; i < sum_blocks_mult; ++i) + { + if (pos_mult[i] != 0) + result.std_eigenvalues[pos_mult[i] - 1] = lambda_mult[i]; + + else + { + throw memory_exception("Invalid array index! Are there more than 256 equal eigenvalues?"); + } + } + + // singleton intervals generated in the first step + unsigned int index = 0; + + for (unsigned int i = 0; i < num_one_intervals; ++i, ++index) + { + result.std_eigenvalues[pos_one[i] - 1] = left_one[i]; + } + return bCompareResult; +} +} // namespace detail +} // namespace linalg +} // namespace viennacl +#endif //VIENNACL_LINALG_DETAIL_BISECT_LARGE_HPP_ http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp new file mode 100755 index 0000000..144640b --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/bisect_small.hpp @@ -0,0 +1,96 @@ +#ifndef VIENNACL_LINALG_DETAIL_BISECT_SMALL_HPP_ +#define VIENNACL_LINALG_DETAIL_BISECT_SMALL_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/linalg/detail//bisect/bisect_small.hpp + @brief Computation of eigenvalues of a small symmetric, tridiagonal matrix + + Implementation based on the sample provided with the CUDA 6.0 SDK, for which + the creation of derivative works is allowed by including the following statement: + "This software contains source code provided by NVIDIA Corporation." +*/ + +// includes, system +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <float.h> + +// includes, project + +#include "viennacl/linalg/detail/bisect/structs.hpp" + +// includes, kernels +#include "viennacl/linalg/detail/bisect/bisect_kernel_calls.hpp" + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ +//////////////////////////////////////////////////////////////////////////////// +//! Determine eigenvalues for matrices smaller than MAX_SMALL_MATRIX +//! @param input handles to input data of kernel +//! @param result handles to result of kernel +//! @param mat_size matrix size +//! @param lg lower limit of Gerschgorin interval +//! @param ug upper limit of Gerschgorin interval +//! @param precision desired precision of eigenvalues +//////////////////////////////////////////////////////////////////////////////// +template<typename NumericT> +void +computeEigenvaluesSmallMatrix(const InputData<NumericT> &input, ResultDataSmall<NumericT> &result, + const unsigned int mat_size, + const NumericT lg, const NumericT ug, + const NumericT precision) +{ + viennacl::linalg::detail::bisectSmall( input, result, mat_size, lg, ug, precision); +} + + +//////////////////////////////////////////////////////////////////////////////// +//! Process the result obtained on the device, that is transfer to host and +//! perform basic sanity checking +//! @param result handles to result data +//! @param mat_size matrix size +//////////////////////////////////////////////////////////////////////////////// +template<typename NumericT> +void +processResultSmallMatrix(ResultDataSmall<NumericT> &result, + const unsigned int mat_size) +{ + // copy data back to host + std::vector<NumericT> left(mat_size); + std::vector<unsigned int> left_count(mat_size); + + viennacl::copy(result.vcl_g_left, left); + viennacl::copy(result.vcl_g_left_count, left_count); + + for (unsigned int i = 0; i < mat_size; ++i) + { + result.std_eigenvalues[left_count[i]] = left[i]; + } +} +} // namespace detail +} // namespace linalg +} // namespace viennacl +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp new file mode 100755 index 0000000..3afa509 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/config.hpp @@ -0,0 +1,44 @@ +#ifndef VIENNACL_LINALG_DETAIL_BISECT_CONFIG_HPP_ +#define VIENNACL_LINALG_DETAIL_BISECT_CONFIG_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + + +/** @file viennacl/linalg/detail//bisect/config.hpp + * @brief Global configuration parameters + * + * Implementation based on the sample provided with the CUDA 6.0 SDK, for which + * the creation of derivative works is allowed by including the following statement: + * "This software contains source code provided by NVIDIA Corporation." + * */ + +// should be power of two +#define VIENNACL_BISECT_MAX_THREADS_BLOCK 256 + +#ifdef VIENNACL_WITH_OPENCL +# define VIENNACL_BISECT_MAX_SMALL_MATRIX 256 +# define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX 256 +#else // if CUDA is used +# define VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX 512 // change to 256 if errors occur +# define VIENNACL_BISECT_MAX_SMALL_MATRIX 512 // change to 256 if errors occur +#endif + + #define VIENNACL_BISECT_MIN_ABS_INTERVAL 5.0e-37 + +#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp new file mode 100755 index 0000000..53cd863 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/gerschgorin.hpp @@ -0,0 +1,94 @@ +#ifndef _VIENNACL_LINALG_DETAIL_BISECT_GERSCHORIN_HPP_ +#define _VIENNACL_LINALG_DETAIL_BISECT_GERSCHORIN_HPP_ + + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/linalg/detail//bisect/gerschgorin.hpp + @brief Computation of Gerschgorin interval for symmetric, tridiagonal matrix + + Implementation based on the sample provided with the CUDA 6.0 SDK, for which + the creation of derivative works is allowed by including the following statement: + "This software contains source code provided by NVIDIA Corporation." +*/ + +#include <cstdio> +#include <cstdlib> +#include <cmath> +#include <cfloat> + +#include "viennacl/linalg/detail/bisect/util.hpp" +#include "viennacl/vector.hpp" + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ + //////////////////////////////////////////////////////////////////////////////// + //! Compute Gerschgorin interval for symmetric, tridiagonal matrix + //! @param d diagonal elements + //! @param s superdiagonal elements + //! @param n size of matrix + //! @param lg lower limit of Gerschgorin interval + //! @param ug upper limit of Gerschgorin interval + //////////////////////////////////////////////////////////////////////////////// + template<typename NumericT> + void + computeGerschgorin(std::vector<NumericT> & d, std::vector<NumericT> & s, unsigned int n, NumericT &lg, NumericT &ug) + { + // compute bounds + for (unsigned int i = 1; i < (n - 1); ++i) + { + + // sum over the absolute values of all elements of row i + NumericT sum_abs_ni = fabsf(s[i]) + fabsf(s[i + 1]); + + lg = min(lg, d[i] - sum_abs_ni); + ug = max(ug, d[i] + sum_abs_ni); + } + + // first and last row, only one superdiagonal element + + // first row + lg = min(lg, d[0] - fabsf(s[1])); + ug = max(ug, d[0] + fabsf(s[1])); + + // last row + lg = min(lg, d[n-1] - fabsf(s[n-1])); + ug = max(ug, d[n-1] + fabsf(s[n-1])); + + // increase interval to avoid side effects of fp arithmetic + NumericT bnorm = max(fabsf(ug), fabsf(lg)); + + // these values depend on the implmentation of floating count that is + // employed in the following + NumericT psi_0 = 11 * FLT_EPSILON * bnorm; + NumericT psi_n = 11 * FLT_EPSILON * bnorm; + + lg = lg - bnorm * 2 * static_cast<NumericT>(n) * FLT_EPSILON - psi_0; + ug = ug + bnorm * 2 * static_cast<NumericT>(n) * FLT_EPSILON + psi_n; + + ug = max(lg, ug); + } +} // namespace detail +} // namespace linalg +} // namespace viennacl +#endif // _VIENNACL_LINALG_DETAIL_GERSCHORIN_H_ http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp new file mode 100755 index 0000000..1943da3 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/structs.hpp @@ -0,0 +1,182 @@ +#ifndef VIENNACL_LINALG_DETAIL_BISECT_STRUCTS_HPP_ +#define VIENNACL_LINALG_DETAIL_BISECT_STRUCTS_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/linalg/detail//bisect/structs.hpp + @brief Helper structures to simplify variable handling + + Implementation based on the sample provided with the CUDA 6.0 SDK, for which + the creation of derivative works is allowed by including the following statement: + "This software contains source code provided by NVIDIA Corporation." +*/ + + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <float.h> +#include <assert.h> + +#include "viennacl/vector.hpp" +#include "viennacl/matrix.hpp" + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ + +///////////////////////////////////////////////////////////////////////////////// +//! In this class the input matrix is stored +///////////////////////////////////////////////////////////////////////////////// +template<typename NumericT> +struct InputData +{ + //! host side representation of diagonal + std::vector<NumericT> std_a; + //! host side representation superdiagonal + std::vector<NumericT> std_b; + //! device side representation of diagonal + viennacl::vector<NumericT> g_a; + //!device side representation of superdiagonal + viennacl::vector<NumericT> g_b; + + /** @brief Initialize the input data to the algorithm + * + * @param diagonal vector with the diagonal elements + * @param superdiagonal vector with the superdiagonal elements + * @param sz size of the matrix + */ + InputData(std::vector<NumericT> diagonal, std::vector<NumericT> superdiagonal, const unsigned int sz) : + std_a(sz), std_b(sz), g_a(sz), g_b(sz) + { + std_a = diagonal; + std_b = superdiagonal; + + viennacl::copy(std_b, g_b); + viennacl::copy(std_a, g_a); + } + + InputData(viennacl::vector<NumericT> diagonal, viennacl::vector<NumericT> superdiagonal, const unsigned int sz) : + std_a(sz), std_b(sz), g_a(sz), g_b(sz) + { + g_a = diagonal; + g_b = superdiagonal; + + viennacl::copy(g_a, std_a); + viennacl::copy(g_b, std_b); + } +}; + + +///////////////////////////////////////////////////////////////////////////////// +//! In this class the data of the result for small matrices is stored +///////////////////////////////////////////////////////////////////////////////// +template<typename NumericT> +struct ResultDataSmall +{ + //! eigenvalues (host side) + std::vector<NumericT> std_eigenvalues; + //! left interval limits at the end of the computation + viennacl::vector<NumericT> vcl_g_left; + //! right interval limits at the end of the computation + viennacl::vector<NumericT> vcl_g_right; + //! number of eigenvalues smaller than the left interval limit + viennacl::vector<unsigned int> vcl_g_left_count; + //! number of eigenvalues bigger than the right interval limit + viennacl::vector<unsigned int> vcl_g_right_count; + + + //////////////////////////////////////////////////////////////////////////////// + //! Initialize variables and memory for the result for small matrices + //////////////////////////////////////////////////////////////////////////////// + ResultDataSmall(const unsigned int mat_size) : + std_eigenvalues(mat_size), vcl_g_left(mat_size), vcl_g_right(mat_size), vcl_g_left_count(mat_size), vcl_g_right_count(mat_size) {} +}; + + + + + +///////////////////////////////////////////////////////////////////////////////// +//! In this class the data of the result for large matrices is stored +///////////////////////////////////////////////////////////////////////////////// +template<typename NumericT> +struct ResultDataLarge +{ +//! eigenvalues + std::vector<NumericT> std_eigenvalues; + + //! number of intervals containing one eigenvalue after the first step + viennacl::scalar<unsigned int> g_num_one; + + //! number of (thread) blocks of intervals containing multiple eigenvalues after the first steo + viennacl::scalar<unsigned int> g_num_blocks_mult; + + //! left interval limits of intervals containing one eigenvalue after the first iteration step + viennacl::vector<NumericT> g_left_one; + + //! right interval limits of intervals containing one eigenvalue after the first iteration step + viennacl::vector<NumericT> g_right_one; + + //! interval indices (position in sorted listed of eigenvalues) of intervals containing one eigenvalue after the first iteration step + viennacl::vector<unsigned int> g_pos_one; + + //! left interval limits of intervals containing multiple eigenvalues after the first iteration step + viennacl::vector<NumericT> g_left_mult; + //! right interval limits of intervals containing multiple eigenvalues after the first iteration step + viennacl::vector<NumericT> g_right_mult; + + //! number of eigenvalues less than the left limit of the eigenvalue intervals containing multiple eigenvalues + viennacl::vector<unsigned int> g_left_count_mult; + + //! number of eigenvalues less than the right limit of the eigenvalue intervals containing multiple eigenvalues + viennacl::vector<unsigned int> g_right_count_mult; + //! start addresses in g_left_mult etc. of blocks of intervals containing more than one eigenvalue after the first step + viennacl::vector<unsigned int> g_blocks_mult; + + //! accumulated number of intervals in g_left_mult etc. of blocks of intervals containing more than one eigenvalue after the first step + viennacl::vector<unsigned int> g_blocks_mult_sum; + + //! eigenvalues that have been generated in the second step from intervals that still contained multiple eigenvalues after the first step + viennacl::vector<NumericT> g_lambda_mult; + + //! eigenvalue index of intervals that have been generated in the second processing step + viennacl::vector<unsigned int> g_pos_mult; + + /** @brief Initialize variables and memory for result + * + * @param mat_size size of the matrix + */ + ResultDataLarge(unsigned int mat_size) : + std_eigenvalues(mat_size), g_num_one(0), g_num_blocks_mult(0), + g_left_one(mat_size), g_right_one(mat_size), g_pos_one(mat_size), + g_left_mult(mat_size), g_right_mult(mat_size), g_left_count_mult(mat_size), g_right_count_mult(mat_size), + g_blocks_mult(mat_size), g_blocks_mult_sum(mat_size), g_lambda_mult(mat_size), g_pos_mult(mat_size) {} + +}; +} // namespace detail +} // namespace linalg +} // namespace viennacl +#endif // #ifndef VIENNACL_LINALG_DETAIL_STRUCTS_HPP_ + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp new file mode 100755 index 0000000..883d202 --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/bisect/util.hpp @@ -0,0 +1,106 @@ +#ifndef VIENNACL_LINALG_DETAIL_BISECT_UTIL_HPP_ +#define VIENNACL_LINALG_DETAIL_BISECT_UTIL_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + + +/** @file viennacl/linalg/detail//bisect/util.hpp + @brief Utility functions + + Implementation based on the sample provided with the CUDA 6.0 SDK, for which + the creation of derivative works is allowed by including the following statement: + "This software contains source code provided by NVIDIA Corporation." +*/ + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ + +//////////////////////////////////////////////////////////////////////////////// +//! Minimum +//////////////////////////////////////////////////////////////////////////////// +template<class T> +#ifdef __CUDACC__ +__host__ __device__ +#endif +T +min(const T &lhs, const T &rhs) +{ + + return (lhs < rhs) ? lhs : rhs; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Maximum +//////////////////////////////////////////////////////////////////////////////// +template<class T> +#ifdef __CUDACC__ +__host__ __device__ +#endif +T +max(const T &lhs, const T &rhs) +{ + + return (lhs < rhs) ? rhs : lhs; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Sign of number (float) +//////////////////////////////////////////////////////////////////////////////// +#ifdef __CUDACC__ +__host__ __device__ +#endif +inline float +sign_f(const float &val) +{ + return (val < 0.0f) ? -1.0f : 1.0f; +} + +//////////////////////////////////////////////////////////////////////////////// +//! Sign of number (double) +//////////////////////////////////////////////////////////////////////////////// +#ifdef __CUDACC__ +__host__ __device__ +#endif +inline double +sign_d(const double &val) +{ + return (val < 0.0) ? -1.0 : 1.0; +} + +/////////////////////////////////////////////////////////////////////////////// +//! Get the number of blocks that are required to process \a num_threads with +//! \a num_threads_blocks threads per block +/////////////////////////////////////////////////////////////////////////////// +extern "C" +inline +unsigned int +getNumBlocksLinear(const unsigned int num_threads, + const unsigned int num_threads_block) +{ + const unsigned int block_rem = + ((num_threads % num_threads_block) != 0) ? 1 : 0; + return (num_threads / num_threads_block) + block_rem; +} +} // namespace detail +} // namespace linalg +} // namespace viennacl +#endif // #ifndef VIENNACL_LINALG_DETAIL_UTIL_HPP_ http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp new file mode 100644 index 0000000..1540e2d --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/block_ilu.hpp @@ -0,0 +1,617 @@ +#ifndef VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_ +#define VIENNACL_LINALG_DETAIL_BLOCK_ILU_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/detail/ilu/block_ilu.hpp + @brief Implementations of incomplete block factorization preconditioners +*/ + +#include <vector> +#include <cmath> +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/linalg/detail/ilu/common.hpp" +#include "viennacl/linalg/detail/ilu/ilu0.hpp" +#include "viennacl/linalg/detail/ilu/ilut.hpp" + +#include <map> + +namespace viennacl +{ +namespace linalg +{ +namespace detail +{ + /** @brief Helper range class for representing a subvector of a larger buffer. */ + template<typename VectorT, typename NumericT, typename SizeT = vcl_size_t> + class ilu_vector_range + { + public: + ilu_vector_range(VectorT & v, + SizeT start_index, + SizeT vec_size + ) : vec_(v), start_(start_index), size_(vec_size) {} + + NumericT & operator()(SizeT index) + { + assert(index < size_ && bool("Index out of bounds!")); + return vec_[start_ + index]; + } + + NumericT & operator[](SizeT index) + { + assert(index < size_ && bool("Index out of bounds!")); + return vec_[start_ + index]; + } + + SizeT size() const { return size_; } + + private: + VectorT & vec_; + SizeT start_; + SizeT size_; + }; + + /** @brief Extracts a diagonal block from a larger system matrix + * + * @param A The full matrix + * @param diagonal_block_A The output matrix, to which the extracted block is written to + * @param start_index First row- and column-index of the block + * @param stop_index First row- and column-index beyond the block + */ + template<typename NumericT> + void extract_block_matrix(viennacl::compressed_matrix<NumericT> const & A, + viennacl::compressed_matrix<NumericT> & diagonal_block_A, + vcl_size_t start_index, + vcl_size_t stop_index + ) + { + assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); + assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); + assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") ); + + NumericT const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle()); + unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1()); + unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2()); + + NumericT * output_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(diagonal_block_A.handle()); + unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle1()); + unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle2()); + + vcl_size_t output_counter = 0; + for (vcl_size_t row = start_index; row < stop_index; ++row) + { + unsigned int buffer_col_start = A_row_buffer[row]; + unsigned int buffer_col_end = A_row_buffer[row+1]; + + output_row_buffer[row - start_index] = static_cast<unsigned int>(output_counter); + + for (unsigned int buf_index = buffer_col_start; buf_index < buffer_col_end; ++buf_index) + { + unsigned int col = A_col_buffer[buf_index]; + if (col < start_index) + continue; + + if (col >= static_cast<unsigned int>(stop_index)) + continue; + + output_col_buffer[output_counter] = static_cast<unsigned int>(col - start_index); + output_elements[output_counter] = A_elements[buf_index]; + ++output_counter; + } + output_row_buffer[row - start_index + 1] = static_cast<unsigned int>(output_counter); + } + } + +} // namespace detail + + + +/** @brief A block ILU preconditioner class, can be supplied to solve()-routines + * + * @tparam MatrixType Type of the system matrix + * @tparam ILUTag Type of the tag identifiying the ILU preconditioner to be used on each block. +*/ +template<typename MatrixT, typename ILUTag> +class block_ilu_precond +{ +typedef typename MatrixT::value_type ScalarType; + +public: + typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block + + + block_ilu_precond(MatrixT const & mat, + ILUTag const & tag, + vcl_size_t num_blocks = 8 + ) : tag_(tag), L_blocks(num_blocks), U_blocks(num_blocks) + { + // Set up vector of block indices: + block_indices_.resize(num_blocks); + for (vcl_size_t i=0; i<num_blocks; ++i) + { + vcl_size_t start_index = ( i * mat.size1()) / num_blocks; + vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks; + + block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index); + } + + //initialize preconditioner: + //std::cout << "Start CPU precond" << std::endl; + init(mat); + //std::cout << "End CPU precond" << std::endl; + } + + block_ilu_precond(MatrixT const & mat, + ILUTag const & tag, + index_vector_type const & block_boundaries + ) : tag_(tag), block_indices_(block_boundaries), L_blocks(block_boundaries.size()), U_blocks(block_boundaries.size()) + { + //initialize preconditioner: + //std::cout << "Start CPU precond" << std::endl; + init(mat); + //std::cout << "End CPU precond" << std::endl; + } + + + template<typename VectorT> + void apply(VectorT & vec) const + { + for (vcl_size_t i=0; i<block_indices_.size(); ++i) + apply_dispatch(vec, i, ILUTag()); + } + +private: + void init(MatrixT const & A) + { + viennacl::context host_context(viennacl::MAIN_MEMORY); + viennacl::compressed_matrix<ScalarType> mat(host_context); + + viennacl::copy(A, mat); + + unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1()); + +#ifdef VIENNACL_WITH_OPENMP + #pragma omp parallel for +#endif + for (long i2=0; i2<static_cast<long>(block_indices_.size()); ++i2) + { + vcl_size_t i = static_cast<vcl_size_t>(i2); + // Step 1: Extract blocks + vcl_size_t block_size = block_indices_[i].second - block_indices_[i].first; + vcl_size_t block_nnz = row_buffer[block_indices_[i].second] - row_buffer[block_indices_[i].first]; + viennacl::compressed_matrix<ScalarType> mat_block(block_size, block_size, block_nnz, host_context); + + detail::extract_block_matrix(mat, mat_block, block_indices_[i].first, block_indices_[i].second); + + // Step 2: Precondition blocks: + viennacl::switch_memory_context(L_blocks[i], host_context); + viennacl::switch_memory_context(U_blocks[i], host_context); + init_dispatch(mat_block, L_blocks[i], U_blocks[i], tag_); + } + + } + + void init_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block, + viennacl::compressed_matrix<ScalarType> & L, + viennacl::compressed_matrix<ScalarType> & U, + viennacl::linalg::ilu0_tag) + { + (void)U; + L = mat_block; + viennacl::linalg::precondition(L, tag_); + } + + void init_dispatch(viennacl::compressed_matrix<ScalarType> const & mat_block, + viennacl::compressed_matrix<ScalarType> & L, + viennacl::compressed_matrix<ScalarType> & U, + viennacl::linalg::ilut_tag) + { + L.resize(mat_block.size1(), mat_block.size2()); + U.resize(mat_block.size1(), mat_block.size2()); + viennacl::linalg::precondition(mat_block, L, U, tag_); + } + + template<typename VectorT> + void apply_dispatch(VectorT & vec, vcl_size_t i, viennacl::linalg::ilu0_tag) const + { + detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].size2()); + + unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1()); + unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2()); + ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle()); + + viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag()); + viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), upper_tag()); + } + + template<typename VectorT> + void apply_dispatch(VectorT & vec, vcl_size_t i, viennacl::linalg::ilut_tag) const + { + detail::ilu_vector_range<VectorT, ScalarType> vec_range(vec, block_indices_[i].first, L_blocks[i].size2()); + + { + unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle1()); + unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks[i].handle2()); + ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(L_blocks[i].handle()); + + viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, L_blocks[i].size2(), unit_lower_tag()); + } + + { + unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle1()); + unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks[i].handle2()); + ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(U_blocks[i].handle()); + + viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, U_blocks[i].size2(), upper_tag()); + } + } + + ILUTag tag_; + index_vector_type block_indices_; + std::vector< viennacl::compressed_matrix<ScalarType> > L_blocks; + std::vector< viennacl::compressed_matrix<ScalarType> > U_blocks; +}; + + + + + +/** @brief ILUT preconditioner class, can be supplied to solve()-routines. +* +* Specialization for compressed_matrix +*/ +template<typename NumericT, unsigned int AlignmentV, typename ILUTagT> +class block_ilu_precond< compressed_matrix<NumericT, AlignmentV>, ILUTagT> +{ + typedef compressed_matrix<NumericT, AlignmentV> MatrixType; + +public: + typedef std::vector<std::pair<vcl_size_t, vcl_size_t> > index_vector_type; //the pair refers to index range [a, b) of each block + + + block_ilu_precond(MatrixType const & mat, + ILUTagT const & tag, + vcl_size_t num_blocks = 8 + ) : tag_(tag), + block_indices_(num_blocks), + gpu_block_indices_(), + gpu_L_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)), + gpu_U_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)), + gpu_D_(mat.size1(), viennacl::context(viennacl::MAIN_MEMORY)), + L_blocks_(num_blocks), + U_blocks_(num_blocks) + { + // Set up vector of block indices: + block_indices_.resize(num_blocks); + for (vcl_size_t i=0; i<num_blocks; ++i) + { + vcl_size_t start_index = ( i * mat.size1()) / num_blocks; + vcl_size_t stop_index = ((i+1) * mat.size1()) / num_blocks; + + block_indices_[i] = std::pair<vcl_size_t, vcl_size_t>(start_index, stop_index); + } + + //initialize preconditioner: + //std::cout << "Start CPU precond" << std::endl; + init(mat); + //std::cout << "End CPU precond" << std::endl; + } + + block_ilu_precond(MatrixType const & mat, + ILUTagT const & tag, + index_vector_type const & block_boundaries + ) : tag_(tag), + block_indices_(block_boundaries), + gpu_block_indices_(), + gpu_L_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)), + gpu_U_trans_(0, 0, viennacl::context(viennacl::MAIN_MEMORY)), + gpu_D_(mat.size1(), viennacl::context(viennacl::MAIN_MEMORY)), + L_blocks_(block_boundaries.size()), + U_blocks_(block_boundaries.size()) + { + //initialize preconditioner: + //std::cout << "Start CPU precond" << std::endl; + init(mat); + //std::cout << "End CPU precond" << std::endl; + } + + + void apply(vector<NumericT> & vec) const + { + viennacl::linalg::detail::block_inplace_solve(trans(gpu_L_trans_), gpu_block_indices_, block_indices_.size(), gpu_D_, + vec, + viennacl::linalg::unit_lower_tag()); + + viennacl::linalg::detail::block_inplace_solve(trans(gpu_U_trans_), gpu_block_indices_, block_indices_.size(), gpu_D_, + vec, + viennacl::linalg::upper_tag()); + + //apply_cpu(vec); + } + + +private: + + void init(MatrixType const & A) + { + viennacl::context host_context(viennacl::MAIN_MEMORY); + viennacl::compressed_matrix<NumericT> mat(host_context); + + mat = A; + + unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1()); + +#ifdef VIENNACL_WITH_OPENMP + #pragma omp parallel for +#endif + for (long i=0; i<static_cast<long>(block_indices_.size()); ++i) + { + // Step 1: Extract blocks + vcl_size_t block_size = block_indices_[static_cast<vcl_size_t>(i)].second - block_indices_[static_cast<vcl_size_t>(i)].first; + vcl_size_t block_nnz = row_buffer[block_indices_[static_cast<vcl_size_t>(i)].second] - row_buffer[block_indices_[static_cast<vcl_size_t>(i)].first]; + viennacl::compressed_matrix<NumericT> mat_block(block_size, block_size, block_nnz, host_context); + + detail::extract_block_matrix(mat, mat_block, block_indices_[static_cast<vcl_size_t>(i)].first, block_indices_[static_cast<vcl_size_t>(i)].second); + + // Step 2: Precondition blocks: + viennacl::switch_memory_context(L_blocks_[static_cast<vcl_size_t>(i)], host_context); + viennacl::switch_memory_context(U_blocks_[static_cast<vcl_size_t>(i)], host_context); + init_dispatch(mat_block, L_blocks_[static_cast<vcl_size_t>(i)], U_blocks_[static_cast<vcl_size_t>(i)], tag_); + } + + /* + * copy resulting preconditioner back to GPU: + */ + viennacl::backend::typesafe_host_array<unsigned int> block_indices_uint(gpu_block_indices_, 2 * block_indices_.size()); + for (vcl_size_t i=0; i<block_indices_.size(); ++i) + { + block_indices_uint.set(2*i, block_indices_[i].first); + block_indices_uint.set(2*i + 1, block_indices_[i].second); + } + + viennacl::backend::memory_create(gpu_block_indices_, block_indices_uint.raw_size(), viennacl::traits::context(A), block_indices_uint.get()); + + blocks_to_device(A); + + } + + // Copy computed preconditioned blocks to OpenCL device + void blocks_to_device(MatrixType const & A) + { + gpu_L_trans_.resize(A.size1(), A.size2()); + gpu_U_trans_.resize(A.size1(), A.size2()); + gpu_D_.resize(A.size1()); + + unsigned int * L_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle1()); + unsigned int * U_trans_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle1()); + + // + // Count elements per row + // +#ifdef VIENNACL_WITH_OPENMP + #pragma omp parallel for +#endif + for (long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2) + { + vcl_size_t block_index = vcl_size_t(block_index2); + + unsigned int block_start = static_cast<unsigned int>(block_indices_[block_index].first); + unsigned int block_stop = static_cast<unsigned int>(block_indices_[block_index].second); + + unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1()); + unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2()); + + // zero row array of L: + std::fill(L_trans_row_buffer + block_start, + L_trans_row_buffer + block_stop, + static_cast<unsigned int>(0)); + + // count number of elements per row: + for (vcl_size_t row = 0; row < L_blocks_[block_index].size1(); ++row) + { + unsigned int col_start = L_row_buffer[row]; + unsigned int col_end = L_row_buffer[row+1]; + + for (unsigned int j = col_start; j < col_end; ++j) + { + unsigned int col = L_col_buffer[j]; + if (col < static_cast<unsigned int>(row)) + L_trans_row_buffer[col + block_start] += 1; + } + } + + ////// same for U + + unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1()); + unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2()); + + // zero row array of U: + std::fill(U_trans_row_buffer + block_start, + U_trans_row_buffer + block_stop, + static_cast<unsigned int>(0)); + + // count number of elements per row: + for (vcl_size_t row = 0; row < U_blocks_[block_index].size1(); ++row) + { + unsigned int col_start = U_row_buffer[row]; + unsigned int col_end = U_row_buffer[row+1]; + + for (unsigned int j = col_start; j < col_end; ++j) + { + unsigned int col = U_col_buffer[j]; + if (col > row) + U_trans_row_buffer[col + block_start] += 1; + } + } + } + + + // + // Exclusive scan on row buffer (feel free to add parallelization here) + // + unsigned int current_value = 0; + for (vcl_size_t i=0; i<gpu_L_trans_.size1(); ++i) + { + unsigned int tmp = L_trans_row_buffer[i]; + L_trans_row_buffer[i] = current_value; + current_value += tmp; + } + gpu_L_trans_.reserve(current_value); + + current_value = 0; + for (vcl_size_t i=0; i<gpu_U_trans_.size1(); ++i) + { + unsigned int tmp = U_trans_row_buffer[i]; + U_trans_row_buffer[i] = current_value; + current_value += tmp; + } + gpu_U_trans_.reserve(current_value); + + + // + // Fill with data + // + unsigned int * L_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_L_trans_.handle2()); + NumericT * L_trans_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_L_trans_.handle()); + + unsigned int * U_trans_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(gpu_U_trans_.handle2()); + NumericT * U_trans_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_U_trans_.handle()); + + NumericT * D_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(gpu_D_.handle()); + + std::vector<unsigned int> offset_L(gpu_L_trans_.size1()); + std::vector<unsigned int> offset_U(gpu_U_trans_.size1()); + +#ifdef VIENNACL_WITH_OPENMP + #pragma omp parallel for +#endif + for (long block_index2 = 0; block_index2 < static_cast<long>(L_blocks_.size()); ++block_index2) + { + vcl_size_t block_index = vcl_size_t(block_index2); + unsigned int block_start = static_cast<unsigned int>(block_indices_[block_index].first); + + unsigned int const * L_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle1()); + unsigned int const * L_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(L_blocks_[block_index].handle2()); + NumericT const * L_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT >(L_blocks_[block_index].handle()); + + + // write L_trans: + for (vcl_size_t row = 0; row < L_blocks_[block_index].size1(); ++row) + { + unsigned int col_start = L_row_buffer[row]; + unsigned int col_end = L_row_buffer[row+1]; + + for (unsigned int j = col_start; j < col_end; ++j) + { + unsigned int col = L_col_buffer[j]; + if (col < row) + { + unsigned int row_trans = col + block_start; + unsigned int k = L_trans_row_buffer[row_trans] + offset_L[row_trans]; + offset_L[row_trans] += 1; + + L_trans_col_buffer[k] = static_cast<unsigned int>(row) + block_start; + L_trans_elements[k] = L_elements[j]; + } + } + } + + unsigned int const * U_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle1()); + unsigned int const * U_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(U_blocks_[block_index].handle2()); + NumericT const * U_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT >(U_blocks_[block_index].handle()); + + // write U_trans and D: + for (vcl_size_t row = 0; row < U_blocks_[block_index].size1(); ++row) + { + unsigned int col_start = U_row_buffer[row]; + unsigned int col_end = U_row_buffer[row+1]; + + for (unsigned int j = col_start; j < col_end; ++j) + { + unsigned int row_trans = U_col_buffer[j] + block_start; + unsigned int k = U_trans_row_buffer[row_trans] + offset_U[row_trans]; + + if (row_trans == row + block_start) // entry for D + { + D_elements[row_trans] = U_elements[j]; + } + else if (row_trans > row + block_start) //entry for U + { + offset_U[row_trans] += 1; + + U_trans_col_buffer[k] = static_cast<unsigned int>(row) + block_start; + U_trans_elements[k] = U_elements[j]; + } + } + } + + } + + // + // Send to destination device: + // + viennacl::switch_memory_context(gpu_L_trans_, viennacl::traits::context(A)); + viennacl::switch_memory_context(gpu_U_trans_, viennacl::traits::context(A)); + viennacl::switch_memory_context(gpu_D_, viennacl::traits::context(A)); + } + + void init_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block, + viennacl::compressed_matrix<NumericT> & L, + viennacl::compressed_matrix<NumericT> & U, + viennacl::linalg::ilu0_tag) + { + L = mat_block; + viennacl::linalg::precondition(L, tag_); + U = L; // fairly poor workaround... + } + + void init_dispatch(viennacl::compressed_matrix<NumericT> const & mat_block, + viennacl::compressed_matrix<NumericT> & L, + viennacl::compressed_matrix<NumericT> & U, + viennacl::linalg::ilut_tag) + { + L.resize(mat_block.size1(), mat_block.size2()); + U.resize(mat_block.size1(), mat_block.size2()); + viennacl::linalg::precondition(mat_block, L, U, tag_); + } + + + ILUTagT tag_; + index_vector_type block_indices_; + viennacl::backend::mem_handle gpu_block_indices_; + viennacl::compressed_matrix<NumericT> gpu_L_trans_; + viennacl::compressed_matrix<NumericT> gpu_U_trans_; + viennacl::vector<NumericT> gpu_D_; + + std::vector<MatrixType> L_blocks_; + std::vector<MatrixType> U_blocks_; +}; + + +} +} + + + + +#endif + + + http://git-wip-us.apache.org/repos/asf/mahout/blob/f7c1f802/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp new file mode 100644 index 0000000..7628cdb --- /dev/null +++ b/native-viennaCL/src/main/cpp/viennacl/linalg/detail/ilu/chow_patel_ilu.hpp @@ -0,0 +1,316 @@ +#ifndef VIENNACL_LINALG_DETAIL_CHOW_PATEL_ILU_HPP_ +#define VIENNACL_LINALG_DETAIL_CHOW_PATEL_ILU_HPP_ + +/* ========================================================================= + Copyright (c) 2010-2016, Institute for Microelectronics, + Institute for Analysis and Scientific Computing, + TU Wien. + Portions of this software are copyright by UChicago Argonne, LLC. + + ----------------- + ViennaCL - The Vienna Computing Library + ----------------- + + Project Head: Karl Rupp [email protected] + + (A list of authors and contributors can be found in the PDF manual) + + License: MIT (X11), see file LICENSE in the base directory +============================================================================= */ + +/** @file viennacl/linalg/detail/ilu/chow_patel_ilu.hpp + @brief Implementations of incomplete factorization preconditioners with fine-grained parallelism. + + Based on "Fine-Grained Parallel Incomplete LU Factorization" by Chow and Patel, SIAM J. Sci. Comput., vol. 37, no. 2, pp. C169-C193 +*/ + +#include <vector> +#include <cmath> +#include <iostream> +#include "viennacl/forwards.h" +#include "viennacl/tools/tools.hpp" +#include "viennacl/linalg/detail/ilu/common.hpp" +#include "viennacl/linalg/ilu_operations.hpp" +#include "viennacl/linalg/prod.hpp" +#include "viennacl/backend/memory.hpp" + +namespace viennacl +{ +namespace linalg +{ + +/** @brief A tag for incomplete LU and incomplete Cholesky factorization with static pattern (Parallel-ILU0, Parallel ICC0) +*/ +class chow_patel_tag +{ +public: + /** @brief Constructor allowing to set the number of sweeps and Jacobi iterations. + * + * @param num_sweeps Number of sweeps in setup phase + * @param num_jacobi_iters Number of Jacobi iterations for each triangular 'solve' when applying the preconditioner to a vector + */ + chow_patel_tag(vcl_size_t num_sweeps = 3, vcl_size_t num_jacobi_iters = 2) : sweeps_(num_sweeps), jacobi_iters_(num_jacobi_iters) {} + + /** @brief Returns the number of sweeps (i.e. number of nonlinear iterations) in the solver setup stage */ + vcl_size_t sweeps() const { return sweeps_; } + /** @brief Sets the number of sweeps (i.e. number of nonlinear iterations) in the solver setup stage */ + void sweeps(vcl_size_t num) { sweeps_ = num; } + + /** @brief Returns the number of Jacobi iterations (i.e. applications of x_{k+1} = (I - D^{-1}R)x_k + D^{-1} b) for each of the solves y = U^{-1} x and z = L^{-1} y) for each preconditioner application. */ + vcl_size_t jacobi_iters() const { return jacobi_iters_; } + /** @brief Sets the number of Jacobi iterations for each triangular 'solve' when applying the preconditioner to a vector. */ + void jacobi_iters(vcl_size_t num) { jacobi_iters_ = num; } + +private: + vcl_size_t sweeps_; + vcl_size_t jacobi_iters_; +}; + +namespace detail +{ + /** @brief Implementation of the parallel ICC0 factorization, Algorithm 3 in Chow-Patel paper. + * + * Rather than dealing with a column-major upper triangular matrix U, we use the lower-triangular matrix L such that A is approximately given by LL^T. + * The advantage is that L is readily available in row-major format. + */ + template<typename NumericT> + void precondition(viennacl::compressed_matrix<NumericT> const & A, + viennacl::compressed_matrix<NumericT> & L, + viennacl::vector<NumericT> & diag_L, + viennacl::compressed_matrix<NumericT> & L_trans, + chow_patel_tag const & tag) + { + // make sure L and U have correct dimensions: + L.resize(A.size1(), A.size2(), false); + + // initialize L and U from values in A: + viennacl::linalg::extract_L(A, L); + + // diagonally scale values from A in L: + viennacl::linalg::icc_scale(A, L); + + viennacl::vector<NumericT> aij_L(L.nnz(), viennacl::traits::context(A)); + viennacl::backend::memory_copy(L.handle(), aij_L.handle(), 0, 0, sizeof(NumericT) * L.nnz()); + + // run sweeps: + for (vcl_size_t i=0; i<tag.sweeps(); ++i) + viennacl::linalg::icc_chow_patel_sweep(L, aij_L); + + // transpose L to obtain L_trans: + viennacl::linalg::ilu_transpose(L, L_trans); + + // form (I - D_L^{-1}L) and (I - D_U^{-1} U), with U := L_trans + viennacl::linalg::ilu_form_neumann_matrix(L, diag_L); + viennacl::linalg::ilu_form_neumann_matrix(L_trans, diag_L); + } + + + /** @brief Implementation of the parallel ILU0 factorization, Algorithm 2 in Chow-Patel paper. */ + template<typename NumericT> + void precondition(viennacl::compressed_matrix<NumericT> const & A, + viennacl::compressed_matrix<NumericT> & L, + viennacl::vector<NumericT> & diag_L, + viennacl::compressed_matrix<NumericT> & U, + viennacl::vector<NumericT> & diag_U, + chow_patel_tag const & tag) + { + // make sure L and U have correct dimensions: + L.resize(A.size1(), A.size2(), false); + U.resize(A.size1(), A.size2(), false); + + // initialize L and U from values in A: + viennacl::linalg::extract_LU(A, L, U); + + // diagonally scale values from A in L and U: + viennacl::linalg::ilu_scale(A, L, U); + + // transpose storage layout of U from CSR to CSC via transposition + viennacl::compressed_matrix<NumericT> U_trans; + viennacl::linalg::ilu_transpose(U, U_trans); + + // keep entries of a_ij for the sweeps + viennacl::vector<NumericT> aij_L (L.nnz(), viennacl::traits::context(A)); + viennacl::vector<NumericT> aij_U_trans(U_trans.nnz(), viennacl::traits::context(A)); + + viennacl::backend::memory_copy( L.handle(), aij_L.handle(), 0, 0, sizeof(NumericT) * L.nnz()); + viennacl::backend::memory_copy(U_trans.handle(), aij_U_trans.handle(), 0, 0, sizeof(NumericT) * U_trans.nnz()); + + // run sweeps: + for (vcl_size_t i=0; i<tag.sweeps(); ++i) + viennacl::linalg::ilu_chow_patel_sweep(L, aij_L, U_trans, aij_U_trans); + + // transpose U_trans back: + viennacl::linalg::ilu_transpose(U_trans, U); + + // form (I - D_L^{-1}L) and (I - D_U^{-1} U) + viennacl::linalg::ilu_form_neumann_matrix(L, diag_L); + viennacl::linalg::ilu_form_neumann_matrix(U, diag_U); + } + +} + + + + +/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines +*/ +template<typename MatrixT> +class chow_patel_icc_precond +{ + // only works with compressed_matrix! + typedef typename MatrixT::CHOW_PATEL_ICC_ONLY_WORKS_WITH_COMPRESSED_MATRIX error_type; +}; + + +/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines. +* +* Specialization for compressed_matrix +*/ +template<typename NumericT, unsigned int AlignmentV> +class chow_patel_icc_precond< viennacl::compressed_matrix<NumericT, AlignmentV> > +{ + +public: + chow_patel_icc_precond(viennacl::compressed_matrix<NumericT, AlignmentV> const & A, chow_patel_tag const & tag) + : tag_(tag), + L_(0, 0, 0, viennacl::traits::context(A)), + diag_L_(A.size1(), viennacl::traits::context(A)), + L_trans_(0, 0, 0, viennacl::traits::context(A)), + x_k_(A.size1(), viennacl::traits::context(A)), + b_(A.size1(), viennacl::traits::context(A)) + { + viennacl::linalg::detail::precondition(A, L_, diag_L_, L_trans_, tag_); + } + + /** @brief Preconditioner application: LL^Tx = b, computed via Ly = b, L^Tx = y using Jacobi iterations. + * + * L contains (I - D_L^{-1}L), L_trans contains (I - D_L^{-1}L^T) where D denotes the respective diagonal matrix + */ + template<typename VectorT> + void apply(VectorT & vec) const + { + // + // y = L^{-1} b through Jacobi iteration y_{k+1} = (I - D^{-1}L)y_k + D^{-1}x + // + b_ = viennacl::linalg::element_div(vec, diag_L_); + x_k_ = b_; + for (unsigned int i=0; i<tag_.jacobi_iters(); ++i) + { + vec = viennacl::linalg::prod(L_, x_k_); + x_k_ = vec + b_; + } + + // + // x = U^{-1} y through Jacobi iteration x_{k+1} = (I - D^{-1}L^T)x_k + D^{-1}b + // + b_ = viennacl::linalg::element_div(x_k_, diag_L_); + x_k_ = b_; // x_1 if x_0 \equiv 0 + for (unsigned int i=0; i<tag_.jacobi_iters(); ++i) + { + vec = viennacl::linalg::prod(L_trans_, x_k_); + x_k_ = vec + b_; + } + + // return result: + vec = x_k_; + } + +private: + chow_patel_tag tag_; + viennacl::compressed_matrix<NumericT> L_; + viennacl::vector<NumericT> diag_L_; + viennacl::compressed_matrix<NumericT> L_trans_; + + mutable viennacl::vector<NumericT> x_k_; + mutable viennacl::vector<NumericT> b_; +}; + + + + + + +/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines +*/ +template<typename MatrixT> +class chow_patel_ilu_precond +{ + // only works with compressed_matrix! + typedef typename MatrixT::CHOW_PATEL_ILU_ONLY_WORKS_WITH_COMPRESSED_MATRIX error_type; +}; + + +/** @brief Parallel Chow-Patel ILU preconditioner class, can be supplied to solve()-routines. +* +* Specialization for compressed_matrix +*/ +template<typename NumericT, unsigned int AlignmentV> +class chow_patel_ilu_precond< viennacl::compressed_matrix<NumericT, AlignmentV> > +{ + +public: + chow_patel_ilu_precond(viennacl::compressed_matrix<NumericT, AlignmentV> const & A, chow_patel_tag const & tag) + : tag_(tag), + L_(0, 0, 0, viennacl::traits::context(A)), + diag_L_(A.size1(), viennacl::traits::context(A)), + U_(0, 0, 0, viennacl::traits::context(A)), + diag_U_(A.size1(), viennacl::traits::context(A)), + x_k_(A.size1(), viennacl::traits::context(A)), + b_(A.size1(), viennacl::traits::context(A)) + { + viennacl::linalg::detail::precondition(A, L_, diag_L_, U_, diag_U_, tag_); + } + + /** @brief Preconditioner application: LUx = b, computed via Ly = b, Ux = y using Jacobi iterations. + * + * L_ contains (I - D_L^{-1}L), U_ contains (I - D_U^{-1}U) where D denotes the respective diagonal matrix + */ + template<typename VectorT> + void apply(VectorT & vec) const + { + // + // y = L^{-1} b through Jacobi iteration y_{k+1} = (I - D^{-1}L)y_k + D^{-1}x + // + b_ = viennacl::linalg::element_div(vec, diag_L_); + x_k_ = b_; + for (unsigned int i=0; i<tag_.jacobi_iters(); ++i) + { + vec = viennacl::linalg::prod(L_, x_k_); + x_k_ = vec + b_; + } + + // + // x = U^{-1} y through Jacobi iteration x_{k+1} = (I - D^{-1}U)x_k + D^{-1}b + // + b_ = viennacl::linalg::element_div(x_k_, diag_U_); + x_k_ = b_; // x_1 if x_0 \equiv 0 + for (unsigned int i=0; i<tag_.jacobi_iters(); ++i) + { + vec = viennacl::linalg::prod(U_, x_k_); + x_k_ = vec + b_; + } + + // return result: + vec = x_k_; + } + +private: + chow_patel_tag tag_; + viennacl::compressed_matrix<NumericT> L_; + viennacl::vector<NumericT> diag_L_; + viennacl::compressed_matrix<NumericT> U_; + viennacl::vector<NumericT> diag_U_; + + mutable viennacl::vector<NumericT> x_k_; + mutable viennacl::vector<NumericT> b_; +}; + + +} // namespace linalg +} // namespace viennacl + + +#endif + + +
