http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/norm_inf.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_inf.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/norm_inf.hpp deleted file mode 100644 index 959bbd8..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/norm_inf.hpp +++ /dev/null @@ -1,108 +0,0 @@ -#ifndef VIENNACL_LINALG_NORM_INF_HPP_ -#define VIENNACL_LINALG_NORM_INF_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file norm_inf.hpp - @brief Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations. -*/ - -#include <cmath> -#include "viennacl/forwards.h" -#include "viennacl/tools/tools.hpp" -#include "viennacl/meta/enable_if.hpp" -#include "viennacl/meta/tag_of.hpp" - -namespace viennacl -{ - // - // generic norm_inf function - // uses tag dispatch to identify which algorithm - // should be called - // - namespace linalg - { - - #ifdef VIENNACL_WITH_UBLAS - // ---------------------------------------------------- - // UBLAS - // - template< typename VectorT > - typename viennacl::enable_if< viennacl::is_ublas< typename viennacl::traits::tag_of< VectorT >::type >::value, - typename VectorT::value_type - >::type - norm_inf(VectorT const& v1) - { - return boost::numeric::ublas::norm_inf(v1); - } - #endif - - - // ---------------------------------------------------- - // STL - // - template< typename T, typename A > - T norm_inf(std::vector<T, A> const & v1) - { - //std::cout << "stl .. " << std::endl; - T result = 0; - for (typename std::vector<T, A>::size_type i=0; i<v1.size(); ++i) - { - if (std::fabs(v1[i]) > result) - result = std::fabs(v1[i]); - } - - return result; - } - - // ---------------------------------------------------- - // VIENNACL - // - template< typename ScalarType> - viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, - const viennacl::vector_base<ScalarType>, - viennacl::op_norm_inf > - norm_inf(viennacl::vector_base<ScalarType> const & v1) - { - //std::cout << "viennacl .. " << std::endl; - return viennacl::scalar_expression< const viennacl::vector_base<ScalarType>, - const viennacl::vector_base<ScalarType>, - viennacl::op_norm_inf >(v1, v1); - } - - // with vector expression: - template<typename LHS, typename RHS, typename OP> - viennacl::scalar_expression<const viennacl::vector_expression<const LHS, const RHS, OP>, - const viennacl::vector_expression<const LHS, const RHS, OP>, - viennacl::op_norm_inf> - norm_inf(viennacl::vector_expression<const LHS, const RHS, OP> const & vector) - { - return viennacl::scalar_expression< const viennacl::vector_expression<const LHS, const RHS, OP>, - const viennacl::vector_expression<const LHS, const RHS, OP>, - viennacl::op_norm_inf >(vector, vector); - } - - - } // end namespace linalg -} // end namespace viennacl -#endif - - - - -
http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/amg_operations.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/amg_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/amg_operations.hpp deleted file mode 100644 index 7cdcf89..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/amg_operations.hpp +++ /dev/null @@ -1,458 +0,0 @@ -#ifndef VIENNACL_LINALG_OPENCL_AMG_OPERATIONS_HPP -#define VIENNACL_LINALG_OPENCL_AMG_OPERATIONS_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the PDF manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file opencl/amg_operations.hpp - @brief Implementations of routines for AMG in OpenCL. -*/ - -#include <cstdlib> -#include <cmath> -#include <map> - -#include "viennacl/linalg/detail/amg/amg_base.hpp" -#include "viennacl/linalg/opencl/common.hpp" -#include "viennacl/linalg/opencl/kernels/amg.hpp" - - -namespace viennacl -{ -namespace linalg -{ -namespace opencl -{ -namespace amg -{ - - -/////////////////////////////////////////// - -/** @brief Routine for taking all connections in the matrix as strong */ -template<typename NumericT> -void amg_influence_trivial(compressed_matrix<NumericT> const & A, - viennacl::linalg::detail::amg::amg_level_context & amg_context, - viennacl::linalg::amg_tag & tag) -{ - (void)tag; - - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - viennacl::linalg::opencl::kernels::amg<NumericT>::init(ctx); - viennacl::ocl::kernel & influence_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_influence_trivial"); - - viennacl::ocl::enqueue(influence_kernel(A.handle1().opencl_handle(), A.handle2().opencl_handle(), - cl_uint(A.size1()), - cl_uint(A.nnz()), - viennacl::traits::opencl_handle(amg_context.influence_jumper_), - viennacl::traits::opencl_handle(amg_context.influence_ids_), - viennacl::traits::opencl_handle(amg_context.influence_values_) - ) - ); -} - - -/** @brief Routine for extracting strongly connected points considering a user-provided threshold value */ -template<typename NumericT> -void amg_influence_advanced(compressed_matrix<NumericT> const & A, - viennacl::linalg::detail::amg::amg_level_context & amg_context, - viennacl::linalg::amg_tag & tag) -{ - (void)A; (void)amg_context; (void)tag; - throw std::runtime_error("amg_influence_advanced() not implemented for OpenCL yet"); -} - - -/** @brief Dispatcher for influence processing */ -template<typename NumericT> -void amg_influence(compressed_matrix<NumericT> const & A, - viennacl::linalg::detail::amg::amg_level_context & amg_context, - viennacl::linalg::amg_tag & tag) -{ - // TODO: dispatch based on influence tolerance provided - amg_influence_trivial(A, amg_context, tag); -} - - - -/** @brief Assign IDs to coarse points. -* -* TODO: Use exclusive_scan on GPU for this. -*/ -inline void enumerate_coarse_points(viennacl::linalg::detail::amg::amg_level_context & amg_context) -{ - viennacl::backend::typesafe_host_array<unsigned int> point_types(amg_context.point_types_.handle(), amg_context.point_types_.size()); - viennacl::backend::typesafe_host_array<unsigned int> coarse_ids(amg_context.coarse_id_.handle(), amg_context.coarse_id_.size()); - viennacl::backend::memory_read(amg_context.point_types_.handle(), 0, point_types.raw_size(), point_types.get()); - viennacl::backend::memory_read(amg_context.coarse_id_.handle(), 0, coarse_ids.raw_size(), coarse_ids.get()); - - unsigned int coarse_id = 0; - for (std::size_t i=0; i<amg_context.point_types_.size(); ++i) - { - coarse_ids.set(i, coarse_id); - if (point_types[i] == viennacl::linalg::detail::amg::amg_level_context::POINT_TYPE_COARSE) - ++coarse_id; - } - - amg_context.num_coarse_ = coarse_id; - - viennacl::backend::memory_write(amg_context.coarse_id_.handle(), 0, coarse_ids.raw_size(), coarse_ids.get()); -} - - -////////////////////////////////////// - - - -/** @brief AG (aggregation based) coarsening, single-threaded version of stage 1 -* -* @param A Operator matrix on all levels -* @param amg_context AMG hierarchy datastructures -* @param tag AMG preconditioner tag -*/ -template<typename NumericT> -void amg_coarse_ag_stage1_mis2(compressed_matrix<NumericT> const & A, - viennacl::linalg::detail::amg::amg_level_context & amg_context, - viennacl::linalg::amg_tag & tag) -{ - (void)tag; - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - viennacl::linalg::opencl::kernels::amg<NumericT>::init(ctx); - - viennacl::vector<unsigned int> random_weights(A.size1(), viennacl::context(viennacl::MAIN_MEMORY)); - unsigned int *random_weights_ptr = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(random_weights.handle()); - for (std::size_t i=0; i<random_weights.size(); ++i) - random_weights_ptr[i] = static_cast<unsigned int>(rand()) % static_cast<unsigned int>(A.size1()); - random_weights.switch_memory_context(viennacl::traits::context(A)); - - // work vectors: - viennacl::vector<unsigned int> work_state(A.size1(), viennacl::traits::context(A)); - viennacl::vector<unsigned int> work_random(A.size1(), viennacl::traits::context(A)); - viennacl::vector<unsigned int> work_index(A.size1(), viennacl::traits::context(A)); - - viennacl::vector<unsigned int> work_state2(A.size1(), viennacl::traits::context(A)); - viennacl::vector<unsigned int> work_random2(A.size1(), viennacl::traits::context(A)); - viennacl::vector<unsigned int> work_index2(A.size1(), viennacl::traits::context(A)); - - unsigned int num_undecided = static_cast<unsigned int>(A.size1()); - viennacl::vector<unsigned int> undecided_buffer(256, viennacl::traits::context(A)); - viennacl::backend::typesafe_host_array<unsigned int> undecided_buffer_host(undecided_buffer.handle(), undecided_buffer.size()); - - viennacl::ocl::kernel & init_workdata_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_init_workdata"); - viennacl::ocl::kernel & max_neighborhood_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_max_neighborhood"); - viennacl::ocl::kernel & mark_mis_nodes_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_mark_mis_nodes"); - viennacl::ocl::kernel & reset_state_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_pmis2_reset_state"); - - unsigned int pmis_iters = 0; - while (num_undecided > 0) - { - ++pmis_iters; - - // - // init temporary work data: - // - viennacl::ocl::enqueue(init_workdata_kernel(work_state, work_random, work_index, - amg_context.point_types_, - random_weights, - cl_uint(A.size1()) - ) - ); - - // - // Propagate maximum tuple twice - // - for (unsigned int r = 0; r < 2; ++r) - { - // max operation - viennacl::ocl::enqueue(max_neighborhood_kernel(work_state, work_random, work_index, - work_state2, work_random2, work_index2, - amg_context.influence_jumper_, amg_context.influence_ids_, - cl_uint(A.size1()) - ) - ); - - // copy work array (can be fused into a single kernel if needed. Previous kernel is in most cases sufficiently heavy) - work_state = work_state2; - work_random = work_random2; - work_index = work_index2; - } - - // - // mark MIS and non-MIS nodes: - // - viennacl::ocl::enqueue(mark_mis_nodes_kernel(work_state, work_index, - amg_context.point_types_, - undecided_buffer, - cl_uint(A.size1()) - ) - ); - - // get number of undecided points on host: - viennacl::backend::memory_read(undecided_buffer.handle(), 0, undecided_buffer_host.raw_size(), undecided_buffer_host.get()); - num_undecided = 0; - for (std::size_t i=0; i<undecided_buffer.size(); ++i) - num_undecided += undecided_buffer_host[i]; - - } //while - - viennacl::ocl::enqueue(reset_state_kernel(amg_context.point_types_, cl_uint(amg_context.point_types_.size()) ) ); -} - - - -/** @brief AG (aggregation based) coarsening. Partially single-threaded version (VIENNACL_AMG_COARSE_AG) -* -* @param A Operator matrix -* @param amg_context AMG hierarchy datastructures -* @param tag AMG preconditioner tag -*/ -template<typename NumericT> -void amg_coarse_ag(compressed_matrix<NumericT> const & A, - viennacl::linalg::detail::amg::amg_level_context & amg_context, - viennacl::linalg::amg_tag & tag) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - viennacl::linalg::opencl::kernels::amg<NumericT>::init(ctx); - - amg_influence_trivial(A, amg_context, tag); - - // - // Stage 1: Build aggregates: - // - if (tag.get_coarsening_method() == viennacl::linalg::AMG_COARSENING_METHOD_MIS2_AGGREGATION) - amg_coarse_ag_stage1_mis2(A, amg_context, tag); - else - throw std::runtime_error("Only MIS2 coarsening implemented. Selected coarsening not available with OpenCL backend!"); - - viennacl::linalg::opencl::amg::enumerate_coarse_points(amg_context); - - // - // Stage 2: Propagate coarse aggregate indices to neighbors: - // - viennacl::ocl::kernel & propagate_coarse_indices = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_agg_propagate_coarse_indices"); - viennacl::ocl::enqueue(propagate_coarse_indices(amg_context.point_types_, - amg_context.coarse_id_, - amg_context.influence_jumper_, - amg_context.influence_ids_, - cl_uint(A.size1()) - ) - ); - - // - // Stage 3: Merge remaining undecided points (merging to first aggregate found when cycling over the hierarchy - // - viennacl::ocl::kernel & merge_undecided = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_agg_merge_undecided"); - viennacl::ocl::enqueue(merge_undecided(amg_context.point_types_, - amg_context.coarse_id_, - amg_context.influence_jumper_, - amg_context.influence_ids_, - cl_uint(A.size1()) - ) - ); - - // - // Stage 4: Set undecided points to fine points (coarse ID already set in Stage 3) - // Note: Stage 3 and Stage 4 were initially fused, but are now split in order to avoid race conditions (or a fallback to sequential execution). - // - viennacl::ocl::kernel & merge_undecided_2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_agg_merge_undecided_2"); - viennacl::ocl::enqueue(merge_undecided_2(amg_context.point_types_, cl_uint(A.size1()) ) ); - -} - - - - -/** @brief Calls the right coarsening procedure -* -* @param A Operator matrix on all levels -* @param amg_context AMG hierarchy datastructures -* @param tag AMG preconditioner tag -*/ -template<typename InternalT1> -void amg_coarse(InternalT1 & A, - viennacl::linalg::detail::amg::amg_level_context & amg_context, - viennacl::linalg::amg_tag & tag) -{ - switch (tag.get_coarsening_method()) - { - case viennacl::linalg::AMG_COARSENING_METHOD_MIS2_AGGREGATION: amg_coarse_ag(A, amg_context, tag); break; - default: throw std::runtime_error("not implemented yet"); - } -} - - - - -////////////////////////////////////// Interpolation ///////////////////////////// - - -/** @brief AG (aggregation based) interpolation. Multi-Threaded! (VIENNACL_INTERPOL_SA) - * - * @param A Operator matrix - * @param P Prolongation matrix - * @param amg_context AMG hierarchy datastructures - * @param tag AMG configuration tag -*/ -template<typename NumericT> -void amg_interpol_ag(compressed_matrix<NumericT> const & A, - compressed_matrix<NumericT> & P, - viennacl::linalg::detail::amg::amg_level_context & amg_context, - viennacl::linalg::amg_tag & tag) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - viennacl::linalg::opencl::kernels::amg<NumericT>::init(ctx); - - (void)tag; - P = compressed_matrix<NumericT>(A.size1(), amg_context.num_coarse_, A.size1(), viennacl::traits::context(A)); - - // build matrix here - viennacl::ocl::kernel & interpolate_ag = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_interpol_ag"); - viennacl::ocl::enqueue(interpolate_ag(P.handle1().opencl_handle(), - P.handle2().opencl_handle(), - P.handle().opencl_handle(), - amg_context.coarse_id_, - cl_uint(A.size1()) - ) - ); - - P.generate_row_block_information(); -} - -/** @brief Smoothed aggregation interpolation. (VIENNACL_INTERPOL_SA) - * - * @param A Operator matrix - * @param P Prolongation matrix - * @param amg_context AMG hierarchy datastructures - * @param tag AMG configuration tag -*/ -template<typename NumericT> -void amg_interpol_sa(compressed_matrix<NumericT> const & A, - compressed_matrix<NumericT> & P, - viennacl::linalg::detail::amg::amg_level_context & amg_context, - viennacl::linalg::amg_tag & tag) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - viennacl::linalg::opencl::kernels::amg<NumericT>::init(ctx); - - (void)tag; - viennacl::compressed_matrix<NumericT> P_tentative(A.size1(), amg_context.num_coarse_, A.size1(), viennacl::traits::context(A)); - - // form tentative operator: - amg_interpol_ag(A, P_tentative, amg_context, tag); - - viennacl::compressed_matrix<NumericT> Jacobi(A.size1(), A.size1(), A.nnz(), viennacl::traits::context(A)); - - viennacl::ocl::kernel & interpol_sa = ctx.get_kernel(viennacl::linalg::opencl::kernels::amg<NumericT>::program_name(), "amg_interpol_sa"); - viennacl::ocl::enqueue(interpol_sa(A.handle1().opencl_handle(), - A.handle2().opencl_handle(), - A.handle().opencl_handle(), - cl_uint(A.size1()), - cl_uint(A.nnz()), - Jacobi.handle1().opencl_handle(), - Jacobi.handle2().opencl_handle(), - Jacobi.handle().opencl_handle(), - NumericT(tag.get_jacobi_weight()) - ) - ); - - P = viennacl::linalg::prod(Jacobi, P_tentative); - - P.generate_row_block_information(); -} - -/** @brief Dispatcher for building the interpolation matrix - * - * @param A Operator matrix - * @param P Prolongation matrix - * @param amg_context AMG hierarchy datastructures - * @param tag AMG configuration tag -*/ -template<typename MatrixT> -void amg_interpol(MatrixT const & A, - MatrixT & P, - viennacl::linalg::detail::amg::amg_level_context & amg_context, - viennacl::linalg::amg_tag & tag) -{ - switch (tag.get_interpolation_method()) - { - case viennacl::linalg::AMG_INTERPOLATION_METHOD_AGGREGATION: amg_interpol_ag (A, P, amg_context, tag); break; - case viennacl::linalg::AMG_INTERPOLATION_METHOD_SMOOTHED_AGGREGATION: amg_interpol_sa (A, P, amg_context, tag); break; - default: throw std::runtime_error("Not implemented yet!"); - } -} - -/** Assign sparse matrix A to dense matrix B */ -template<typename NumericT, unsigned int AlignmentV> -void assign_to_dense(viennacl::compressed_matrix<NumericT, AlignmentV> const & A, - viennacl::matrix_base<NumericT> & B) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - viennacl::linalg::opencl::kernels::compressed_matrix<NumericT>::init(ctx); - viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<NumericT>::program_name(), - "assign_to_dense"); - - viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), - viennacl::traits::opencl_handle(B), - cl_uint(viennacl::traits::start1(B)), cl_uint(viennacl::traits::start2(B)), - cl_uint(viennacl::traits::stride1(B)), cl_uint(viennacl::traits::stride2(B)), - cl_uint(viennacl::traits::size1(B)), cl_uint(viennacl::traits::size2(B)), - cl_uint(viennacl::traits::internal_size1(B)), cl_uint(viennacl::traits::internal_size2(B)) )); - -} - -/** @brief Jacobi Smoother (OpenCL version) -* -* @param iterations Number of smoother iterations -* @param A Operator matrix for the smoothing -* @param x The vector smoothing is applied to -* @param x_backup (Different) Vector holding the same values as x -* @param rhs_smooth The right hand side of the equation for the smoother -* @param weight Damping factor. 0: No effect of smoother. 1: Undamped Jacobi iteration -*/ -template<typename NumericT> -void smooth_jacobi(unsigned int iterations, - compressed_matrix<NumericT> const & A, - vector<NumericT> & x, - vector<NumericT> & x_backup, - vector<NumericT> const & rhs_smooth, - NumericT weight) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(x).context()); - viennacl::linalg::opencl::kernels::compressed_matrix<NumericT>::init(ctx); - viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<NumericT>::program_name(), "jacobi"); - - for (unsigned int i=0; i<iterations; ++i) - { - x_backup = x; - - viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), - static_cast<NumericT>(weight), - viennacl::traits::opencl_handle(x_backup), - viennacl::traits::opencl_handle(x), - viennacl::traits::opencl_handle(rhs_smooth), - static_cast<cl_uint>(rhs_smooth.size()))); - - } -} - - -} //namespace amg -} //namespace host_based -} //namespace linalg -} //namespace viennacl - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/bisect_kernel_calls.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/bisect_kernel_calls.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/bisect_kernel_calls.hpp deleted file mode 100644 index 2fcd6fa..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/bisect_kernel_calls.hpp +++ /dev/null @@ -1,177 +0,0 @@ -#ifndef VIENNACL_LINALG_OPENCL_BISECT_KERNEL_CALLS_HPP_ -#define VIENNACL_LINALG_OPENCL_BISECT_KERNEL_CALLS_HPP_ - - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - - -/** @file viennacl/linalg/opencl/bisect_kernel_calls.hpp - @brief OpenCL kernel calls for the bisection algorithm - - Implementation based on the sample provided with the CUDA 6.0 SDK, for which - the creation of derivative works is allowed by including the following statement: - "This software contains source code provided by NVIDIA Corporation." -*/ - -// includes, project -#include "viennacl/linalg/opencl/kernels/bisect.hpp" -#include "viennacl/linalg/detail/bisect/structs.hpp" -#include "viennacl/linalg/detail/bisect/config.hpp" -#include "viennacl/linalg/detail/bisect/util.hpp" - -namespace viennacl -{ -namespace linalg -{ -namespace opencl -{ -const std::string BISECT_KERNEL_SMALL = "bisectKernelSmall"; -const std::string BISECT_KERNEL_LARGE = "bisectKernelLarge"; -const std::string BISECT_KERNEL_LARGE_ONE_INTERVALS = "bisectKernelLarge_OneIntervals"; -const std::string BISECT_KERNEL_LARGE_MULT_INTERVALS = "bisectKernelLarge_MultIntervals"; - -template<typename NumericT> -void bisectSmall(const viennacl::linalg::detail::InputData<NumericT> &input, - viennacl::linalg::detail::ResultDataSmall<NumericT> &result, - const unsigned int mat_size, - const NumericT lg, const NumericT ug, - const NumericT precision) - { - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context()); - viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::init(ctx); - - viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::program_name(), BISECT_KERNEL_SMALL); - kernel.global_work_size(0, 1 * VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX); - kernel.local_work_size(0, VIENNACL_BISECT_MAX_THREADS_BLOCK_SMALL_MATRIX); - - viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a), - viennacl::traits::opencl_handle(input.g_b), - static_cast<cl_uint>(mat_size), - viennacl::traits::opencl_handle(result.vcl_g_left), - viennacl::traits::opencl_handle(result.vcl_g_right), - viennacl::traits::opencl_handle(result.vcl_g_left_count), - viennacl::traits::opencl_handle(result.vcl_g_right_count), - static_cast<NumericT>(lg), - static_cast<NumericT>(ug), - static_cast<cl_uint>(0), - static_cast<cl_uint>(mat_size), - static_cast<NumericT>(precision) - )); - - } - -template<typename NumericT> -void bisectLarge(const viennacl::linalg::detail::InputData<NumericT> &input, - viennacl::linalg::detail::ResultDataLarge<NumericT> &result, - const unsigned int mat_size, - const NumericT lg, const NumericT ug, - const NumericT precision) - { - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context()); - viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::init(ctx); - - viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::program_name(), BISECT_KERNEL_LARGE); - kernel.global_work_size(0, mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2); // Use only 128 threads for 256 < n <= 512, this - kernel.local_work_size(0, mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2); // is reasoned - - viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a), - viennacl::traits::opencl_handle(input.g_b), - static_cast<cl_uint>(mat_size), - static_cast<NumericT>(lg), - static_cast<NumericT>(ug), - static_cast<cl_uint>(0), - static_cast<cl_uint>(mat_size), - static_cast<NumericT>(precision), - viennacl::traits::opencl_handle(result.g_num_one), - viennacl::traits::opencl_handle(result.g_num_blocks_mult), - viennacl::traits::opencl_handle(result.g_left_one), - viennacl::traits::opencl_handle(result.g_right_one), - viennacl::traits::opencl_handle(result.g_pos_one), - viennacl::traits::opencl_handle(result.g_left_mult), - viennacl::traits::opencl_handle(result.g_right_mult), - viennacl::traits::opencl_handle(result.g_left_count_mult), - viennacl::traits::opencl_handle(result.g_right_count_mult), - viennacl::traits::opencl_handle(result.g_blocks_mult), - viennacl::traits::opencl_handle(result.g_blocks_mult_sum) - )); - - } - -template<typename NumericT> -void bisectLargeOneIntervals(const viennacl::linalg::detail::InputData<NumericT> &input, - viennacl::linalg::detail::ResultDataLarge<NumericT> &result, - const unsigned int mat_size, - const NumericT precision) - { - unsigned int num_one_intervals = result.g_num_one; - unsigned int num_blocks = viennacl::linalg::detail::getNumBlocksLinear(num_one_intervals, - mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK: VIENNACL_BISECT_MAX_THREADS_BLOCK / 2); - - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context()); - viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::init(ctx); - - viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::program_name(), BISECT_KERNEL_LARGE_ONE_INTERVALS); - kernel.global_work_size(0, num_blocks * (mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2)); - kernel.local_work_size(0, mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2); - - viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a), - viennacl::traits::opencl_handle(input.g_b), - static_cast<cl_uint>(mat_size), - static_cast<cl_uint>(num_one_intervals), - viennacl::traits::opencl_handle(result.g_left_one), - viennacl::traits::opencl_handle(result.g_right_one), - viennacl::traits::opencl_handle(result.g_pos_one), - static_cast<NumericT>(precision) - )); - } - - -template<typename NumericT> -void bisectLargeMultIntervals(const viennacl::linalg::detail::InputData<NumericT> &input, - viennacl::linalg::detail::ResultDataLarge<NumericT> &result, - const unsigned int mat_size, - const NumericT precision) - { - unsigned int num_blocks_mult = result.g_num_blocks_mult; - - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input.g_a).context()); - viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::init(ctx); - - viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::bisect_kernel<NumericT>::program_name(), BISECT_KERNEL_LARGE_MULT_INTERVALS); - kernel.global_work_size(0, num_blocks_mult * (mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2)); - kernel.local_work_size(0, mat_size > 512 ? VIENNACL_BISECT_MAX_THREADS_BLOCK : VIENNACL_BISECT_MAX_THREADS_BLOCK / 2); - - viennacl::ocl::enqueue(kernel(viennacl::traits::opencl_handle(input.g_a), - viennacl::traits::opencl_handle(input.g_b), - static_cast<cl_uint>(mat_size), - viennacl::traits::opencl_handle(result.g_blocks_mult), - viennacl::traits::opencl_handle(result.g_blocks_mult_sum), - viennacl::traits::opencl_handle(result.g_left_mult), - viennacl::traits::opencl_handle(result.g_right_mult), - viennacl::traits::opencl_handle(result.g_left_count_mult), - viennacl::traits::opencl_handle(result.g_right_count_mult), - viennacl::traits::opencl_handle(result.g_lambda_mult), - viennacl::traits::opencl_handle(result.g_pos_mult), - static_cast<NumericT>(precision) - )); - } -} // namespace opencl -} // namespace linalg -} // namespace viennacl - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/common.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/common.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/common.hpp deleted file mode 100644 index d6a288b..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/common.hpp +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef VIENNACL_LINALG_OPENCL_COMMON_HPP_ -#define VIENNACL_LINALG_OPENCL_COMMON_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/opencl/common.hpp - @brief Common implementations shared by OpenCL-based operations -*/ - -#include <cmath> - -#include "viennacl/forwards.h" -#include "viennacl/ocl/platform.hpp" -#include "viennacl/traits/handle.hpp" - -namespace viennacl -{ -namespace linalg -{ -namespace opencl -{ -namespace detail -{ - - - -inline cl_uint make_options(vcl_size_t length, bool reciprocal, bool flip_sign) -{ - return static_cast<cl_uint>( ((length > 1) ? (cl_uint(length) << 2) : 0) + (reciprocal ? 2 : 0) + (flip_sign ? 1 : 0) ); -} - - -/** @brief Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices. */ -inline std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major) -{ - if (B_transposed) - { - if (B_row_major && C_row_major) - return "trans_mat_mult_row_row"; - if (B_row_major && !C_row_major) - return "trans_mat_mult_row_col"; - if (!B_row_major && C_row_major) - return "trans_mat_mult_col_row"; - - return "trans_mat_mult_col_col"; - } - - if (B_row_major && C_row_major) - return "mat_mult_row_row"; - if (B_row_major && !C_row_major) - return "mat_mult_row_col"; - if (!B_row_major && C_row_major) - return "mat_mult_col_row"; - - return "mat_mult_col_col"; -} - - - -template<typename SomeT> -ocl::device const & current_device(SomeT const & obj) { return traits::opencl_handle(obj).context().current_device(); } - -inline std::string op_to_string(op_abs) { return "abs"; } -inline std::string op_to_string(op_acos) { return "acos"; } -inline std::string op_to_string(op_asin) { return "asin"; } -inline std::string op_to_string(op_atan) { return "atan"; } -inline std::string op_to_string(op_ceil) { return "ceil"; } -inline std::string op_to_string(op_cos) { return "cos"; } -inline std::string op_to_string(op_cosh) { return "cosh"; } -inline std::string op_to_string(op_exp) { return "exp"; } -inline std::string op_to_string(op_fabs) { return "fabs"; } -inline std::string op_to_string(op_floor) { return "floor"; } -inline std::string op_to_string(op_log) { return "log"; } -inline std::string op_to_string(op_log10) { return "log10"; } -inline std::string op_to_string(op_sin) { return "sin"; } -inline std::string op_to_string(op_sinh) { return "sinh"; } -inline std::string op_to_string(op_sqrt) { return "sqrt"; } -inline std::string op_to_string(op_tan) { return "tan"; } -inline std::string op_to_string(op_tanh) { return "tanh"; } - -} //namespace detail -} //namespace opencl -} //namespace linalg -} //namespace viennacl - - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/direct_solve.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/direct_solve.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/direct_solve.hpp deleted file mode 100644 index 76874b1..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/direct_solve.hpp +++ /dev/null @@ -1,153 +0,0 @@ -#ifndef VIENNACL_LINALG_OPENCL_DIRECT_SOLVE_HPP -#define VIENNACL_LINALG_OPENCL_DIRECT_SOLVE_HPP - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/opencl/direct_solve.hpp - @brief Implementations of dense direct solvers are found here. -*/ - -#include "viennacl/vector.hpp" -#include "viennacl/matrix.hpp" -#include "viennacl/ocl/kernel.hpp" -#include "viennacl/ocl/device.hpp" -#include "viennacl/ocl/handle.hpp" -#include "viennacl/linalg/opencl/kernels/matrix_solve.hpp" -#include "viennacl/linalg/opencl/kernels/matrix.hpp" - -namespace viennacl -{ -namespace linalg -{ -namespace opencl -{ - -namespace detail -{ - inline cl_uint get_option_for_solver_tag(viennacl::linalg::upper_tag) { return 0; } - inline cl_uint get_option_for_solver_tag(viennacl::linalg::unit_upper_tag) { return (1 << 0); } - inline cl_uint get_option_for_solver_tag(viennacl::linalg::lower_tag) { return (1 << 2); } - inline cl_uint get_option_for_solver_tag(viennacl::linalg::unit_lower_tag) { return (1 << 2) | (1 << 0); } - - template<typename MatrixT1, typename MatrixT2, typename KernelT> - void inplace_solve_impl(MatrixT1 const & A, MatrixT2 & B, KernelT & k) - { - viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A), - cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)), - cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)), - cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)), - cl_uint(viennacl::traits::internal_size1(A)), cl_uint(viennacl::traits::internal_size2(A)), - viennacl::traits::opencl_handle(B), - cl_uint(viennacl::traits::start1(B)), cl_uint(viennacl::traits::start2(B)), - cl_uint(viennacl::traits::stride1(B)), cl_uint(viennacl::traits::stride2(B)), - cl_uint(viennacl::traits::size1(B)), cl_uint(viennacl::traits::size2(B)), - cl_uint(viennacl::traits::internal_size1(B)), cl_uint(viennacl::traits::internal_size2(B)) - ) - ); - } -} - - -// -// Note: By convention, all size checks are performed in the calling frontend. No need to double-check here. -// - -////////////////// upper triangular solver (upper_tag) ////////////////////////////////////// -/** @brief Direct inplace solver for dense triangular systems. Matlab notation: A \ B -* -* @param A The system matrix -* @param B The matrix of row vectors, where the solution is directly written to -*/ -template<typename NumericT, typename SolverTagT> -void inplace_solve(matrix_base<NumericT> const & A, - matrix_base<NumericT> & B, - SolverTagT) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - - std::string program_name; - if (A.row_major() && B.row_major()) - { - typedef viennacl::linalg::opencl::kernels::matrix_solve<NumericT, row_major, row_major> KernelClass; - KernelClass::init(ctx); - program_name = KernelClass::program_name(); - } - else if (A.row_major() && !B.row_major()) - { - typedef viennacl::linalg::opencl::kernels::matrix_solve<NumericT, row_major, column_major> KernelClass; - KernelClass::init(ctx); - program_name = KernelClass::program_name(); - } - else if (!A.row_major() && B.row_major()) - { - typedef viennacl::linalg::opencl::kernels::matrix_solve<NumericT, column_major, row_major> KernelClass; - KernelClass::init(ctx); - program_name = KernelClass::program_name(); - } - else - { - typedef viennacl::linalg::opencl::kernels::matrix_solve<NumericT, column_major, column_major> KernelClass; - KernelClass::init(ctx); - program_name = KernelClass::program_name(); - } - - std::stringstream ss; - ss << SolverTagT::name(); - ss << "_solve"; - - viennacl::ocl::kernel & k = ctx.get_kernel(program_name, ss.str()); - - k.global_work_size(0, B.size2() * k.local_work_size()); - detail::inplace_solve_impl(A, B, k); -} - - - -// -// Solve on vector -// - -template<typename NumericT, typename SOLVERTAG> -void inplace_solve(matrix_base<NumericT> const & A, - vector_base<NumericT> & x, - SOLVERTAG) -{ - cl_uint options = detail::get_option_for_solver_tag(SOLVERTAG()); - - viennacl::ocl::kernel & k = detail::legacy_kernel_for_matrix(A, "triangular_substitute_inplace"); - - k.global_work_size(0, k.local_work_size()); - viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(A), - cl_uint(viennacl::traits::start1(A)), cl_uint(viennacl::traits::start2(A)), - cl_uint(viennacl::traits::stride1(A)), cl_uint(viennacl::traits::stride2(A)), - cl_uint(viennacl::traits::size1(A)), cl_uint(viennacl::traits::size2(A)), - cl_uint(viennacl::traits::internal_size1(A)), cl_uint(viennacl::traits::internal_size2(A)), - viennacl::traits::opencl_handle(x), - cl_uint(viennacl::traits::start(x)), - cl_uint(viennacl::traits::stride(x)), - cl_uint(viennacl::traits::size(x)), - options - ) - ); -} - -} //namespace opencl -} //namespace linalg -} //namespace viennacl - -#endif http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/fft_operations.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/fft_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/fft_operations.hpp deleted file mode 100644 index a7b12b3..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/fft_operations.hpp +++ /dev/null @@ -1,350 +0,0 @@ -#ifndef VIENNACL_LINALG_OPENCL_FFT_OPERATIONS_HPP_ -#define VIENNACL_LINALG_OPENCL_FFT_OPERATIONS_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory - ============================================================================= */ - -/** @file viennacl/linalg/opencl/fft_operations.hpp - @brief Implementations of Fast Furier Transformation using OpenCL - */ - -#include "viennacl/forwards.h" -#include "viennacl/ocl/device.hpp" -#include "viennacl/ocl/kernel.hpp" -#include "viennacl/traits/handle.hpp" -#include "viennacl/traits/stride.hpp" -#include "viennacl/linalg/host_based/fft_operations.hpp" -#include "viennacl/linalg/opencl/kernels/fft.hpp" -#include "viennacl/linalg/opencl/kernels/matrix.hpp" - -#include <viennacl/vector.hpp> -#include <viennacl/matrix.hpp> - -#include <cmath> -#include <stdexcept> - -namespace viennacl -{ -namespace linalg -{ -namespace detail -{ -namespace fft -{ - - const vcl_size_t MAX_LOCAL_POINTS_NUM = 512; - - /** - * @brief Get number of bits - */ - inline vcl_size_t num_bits(vcl_size_t size) - { - vcl_size_t bits_datasize = 0; - vcl_size_t ds = 1; - - while (ds < size) - { - ds = ds << 1; - bits_datasize++; - } - - return bits_datasize; - } - - /** - * @brief Find next power of two - */ - inline vcl_size_t next_power_2(vcl_size_t n) - { - n = n - 1; - - vcl_size_t power = 1; - - while (power < sizeof(vcl_size_t) * 8) - { - n = n | (n >> power); - power *= 2; - } - - return n + 1; - } - -} //namespce fft -} //namespace detail - -namespace opencl -{ - -/** - * @brief Direct algorithm for computing Fourier transformation. - * - * Works on any sizes of data. - * Serial implementation has o(n^2) complexity - */ -template<typename NumericT> -void direct(viennacl::ocl::handle<cl_mem> const & in, - viennacl::ocl::handle<cl_mem> const & out, - vcl_size_t size, vcl_size_t stride, vcl_size_t batch_num, NumericT sign = NumericT(-1), - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - - std::string program_string = viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::program_name(); - if (data_order == viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR) - { - viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::init(ctx); - program_string = - viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::program_name(); - } else - viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::init(ctx); - - viennacl::ocl::kernel & k = ctx.get_kernel(program_string, "fft_direct"); - viennacl::ocl::enqueue(k(in, out, - static_cast<cl_uint>(size), - static_cast<cl_uint>(stride), - static_cast<cl_uint>(batch_num), - sign) - ); -} - -/* - * This function performs reorder of input data. Indexes are sorted in bit-reversal order. - * Such reordering should be done before in-place FFT. - */ -template<typename NumericT> -void reorder(viennacl::ocl::handle<cl_mem> const & in, - vcl_size_t size, vcl_size_t stride, - vcl_size_t bits_datasize, vcl_size_t batch_num, - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - - std::string program_string = viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::program_name(); - if (data_order == viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR) - { - viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::init(ctx); - program_string = viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::program_name(); - } else - viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::init(ctx); - - viennacl::ocl::kernel& k = ctx.get_kernel(program_string, "fft_reorder"); - viennacl::ocl::enqueue(k(in, - static_cast<cl_uint>(bits_datasize), static_cast<cl_uint>(size), - static_cast<cl_uint>(stride), static_cast<cl_uint>(batch_num)) - ); -} - -/** - * @brief Radix-2 algorithm for computing Fourier transformation. - * - * Works only on power-of-two sizes of data. - * Serial implementation has o(n * lg n) complexity. - * This is a Cooley-Tukey algorithm - */ -template<typename NumericT> -void radix2(viennacl::ocl::handle<cl_mem> const & in, - vcl_size_t size, vcl_size_t stride, - vcl_size_t batch_num, NumericT sign = NumericT(-1), - viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::DATA_ORDER data_order = viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::ROW_MAJOR) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - - assert(batch_num != 0 && bool("batch_num must be larger than 0")); - - std::string program_string = viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::program_name(); - if (data_order == viennacl::linalg::host_based::detail::fft::FFT_DATA_ORDER::COL_MAJOR) - { - viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::init(ctx); - program_string = viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, column_major>::program_name(); - } else - viennacl::linalg::opencl::kernels::matrix_legacy<NumericT, row_major>::init(ctx); - - vcl_size_t bits_datasize = viennacl::linalg::detail::fft::num_bits(size); - if (size <= viennacl::linalg::detail::fft::MAX_LOCAL_POINTS_NUM) - { - viennacl::ocl::kernel & k = ctx.get_kernel(program_string, "fft_radix2_local"); - viennacl::ocl::enqueue(k(in, - viennacl::ocl::local_mem((size * 4) * sizeof(NumericT)), - static_cast<cl_uint>(bits_datasize), static_cast<cl_uint>(size), - static_cast<cl_uint>(stride), static_cast<cl_uint>(batch_num), sign)); - - } - else - { - viennacl::linalg::opencl::reorder<NumericT>(in, size, stride, bits_datasize, batch_num); - - for (vcl_size_t step = 0; step < bits_datasize; step++) - { - viennacl::ocl::kernel & k = ctx.get_kernel(program_string, "fft_radix2"); - viennacl::ocl::enqueue(k(in, - static_cast<cl_uint>(step), static_cast<cl_uint>(bits_datasize), - static_cast<cl_uint>(size), static_cast<cl_uint>(stride), - static_cast<cl_uint>(batch_num), sign)); - } - } -} - -/** - * @brief Bluestein's algorithm for computing Fourier transformation. - * - * Currently, Works only for sizes of input data which less than 2^16. - * Uses a lot of additional memory, but should be fast for any size of data. - * Serial implementation has something about o(n * lg n) complexity - */ -template<typename NumericT, unsigned int AlignmentV> -void bluestein(viennacl::vector<NumericT, AlignmentV>& in, - viennacl::vector<NumericT, AlignmentV>& out, vcl_size_t /*batch_num*/) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - - vcl_size_t size = in.size() >> 1; - vcl_size_t ext_size = viennacl::linalg::detail::fft::next_power_2(2 * size - 1); - - viennacl::vector<NumericT, AlignmentV> A(ext_size << 1); - viennacl::vector<NumericT, AlignmentV> B(ext_size << 1); - viennacl::vector<NumericT, AlignmentV> Z(ext_size << 1); - - { - viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "zero2"); - viennacl::ocl::enqueue(k(A, B, static_cast<cl_uint>(ext_size))); - } - { - viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "bluestein_pre"); - viennacl::ocl::enqueue(k(in, A, B, static_cast<cl_uint>(size), static_cast<cl_uint>(ext_size))); - } - - viennacl::linalg::convolve_i(A, B, Z); - - { - viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "bluestein_post"); - viennacl::ocl::enqueue(k(Z, out, static_cast<cl_uint>(size))); - } -} - -/** - * @brief Mutiply two complex vectors and store result in output - */ -template<typename NumericT, unsigned int AlignmentV> -void multiply_complex(viennacl::vector<NumericT, AlignmentV> const & input1, - viennacl::vector<NumericT, AlignmentV> const & input2, - viennacl::vector<NumericT, AlignmentV> & output) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input1).context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - vcl_size_t size = input1.size() >> 1; - viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "fft_mult_vec"); - viennacl::ocl::enqueue(k(input1, input2, output, static_cast<cl_uint>(size))); -} - -/** - * @brief Normalize vector on with his own size - */ -template<typename NumericT, unsigned int AlignmentV> -void normalize(viennacl::vector<NumericT, AlignmentV> & input) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - - viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "fft_div_vec_scalar"); - - vcl_size_t size = input.size() >> 1; - NumericT norm_factor = static_cast<NumericT>(size); - viennacl::ocl::enqueue(k(input, static_cast<cl_uint>(size), norm_factor)); -} - -/** - * @brief Inplace_transpose matrix - */ -template<typename NumericT, unsigned int AlignmentV> -void transpose(viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> & input) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - - viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "transpose_inplace"); - viennacl::ocl::enqueue(k(input, static_cast<cl_uint>(input.internal_size1() >> 1), - static_cast<cl_uint>(input.internal_size2()) >> 1)); -} - -/** - * @brief Transpose matrix - */ -template<typename NumericT, unsigned int AlignmentV> -void transpose(viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> const & input, - viennacl::matrix<NumericT, viennacl::row_major, AlignmentV> & output) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - - viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "transpose"); - viennacl::ocl::enqueue(k(input, output, static_cast<cl_uint>(input.internal_size1() >> 1), - static_cast<cl_uint>(input.internal_size2() >> 1))); -} - -/** - * @brief Create complex vector from real vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) - */ -template<typename NumericT> -void real_to_complex(viennacl::vector_base<NumericT> const & in, - viennacl::vector_base<NumericT> & out, vcl_size_t size) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - - viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "real_to_complex"); - viennacl::ocl::enqueue(k(in, out, static_cast<cl_uint>(size))); -} - -/** - * @brief Create real vector from complex vector (even elements(2*k) = real part, odd elements(2*k+1) = imaginary part) - */ -template<typename NumericT> -void complex_to_real(viennacl::vector_base<NumericT> const & in, - viennacl::vector_base<NumericT> & out, vcl_size_t size) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - - viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "complex_to_real"); - viennacl::ocl::enqueue(k(in, out, static_cast<cl_uint>(size))); -} - -/** - * @brief Reverse vector to oposite order and save it in input vector - */ -template<typename NumericT> -void reverse(viennacl::vector_base<NumericT>& in) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context()); - viennacl::linalg::opencl::kernels::fft<NumericT>::init(ctx); - - vcl_size_t size = in.size(); - - viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::fft<NumericT>::program_name(), "reverse_inplace"); - viennacl::ocl::enqueue(k(in, static_cast<cl_uint>(size))); -} - -} //namespace opencl -} //namespace linalg -} //namespace viennacl - -#endif /* FFT_OPERATIONS_HPP_ */ - http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/ilu_operations.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/ilu_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/ilu_operations.hpp deleted file mode 100644 index 248a88a..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/opencl/ilu_operations.hpp +++ /dev/null @@ -1,260 +0,0 @@ -#ifndef VIENNACL_LINALG_OPENCL_ILU_OPERATIONS_HPP_ -#define VIENNACL_LINALG_OPENCL_ILU_OPERATIONS_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the PDF manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/opencl/ilu_operations.hpp - @brief Implementations of specialized routines for the Chow-Patel parallel ILU preconditioner using OpenCL -*/ - -#include <cmath> -#include <algorithm> //for std::max and std::min - -#include "viennacl/forwards.h" -#include "viennacl/scalar.hpp" -#include "viennacl/tools/tools.hpp" -#include "viennacl/linalg/opencl/common.hpp" -#include "viennacl/linalg/opencl/kernels/ilu.hpp" -#include "viennacl/meta/predicate.hpp" -#include "viennacl/meta/enable_if.hpp" -#include "viennacl/traits/size.hpp" -#include "viennacl/traits/start.hpp" -#include "viennacl/traits/stride.hpp" -#include "viennacl/linalg/vector_operations.hpp" - - -namespace viennacl -{ -namespace linalg -{ -namespace opencl -{ - -/////////////////////// ICC ///////////////////// - -template<typename NumericT> -void extract_L(compressed_matrix<NumericT> const & A, - compressed_matrix<NumericT> & L) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx); - - // - // Step 1: Count elements in L: - // - viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_L_1"); - - viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), cl_uint(A.size1()), - L.handle1().opencl_handle()) - ); - - // - // Step 2: Exclusive scan on row_buffers: - // - viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), A.size1() + 1, 0, 1); - viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer); - L.reserve(wrapped_L_row_buffer[L.size1()], false); - - - // - // Step 3: Write entries - // - viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_L_2"); - - viennacl::ocl::enqueue(k2(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), - L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle()) - ); - - L.generate_row_block_information(); - -} // extract_LU - -/////////////////////////////////////////////// - - - -/** @brief Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. */ -template<typename NumericT> -void icc_scale(compressed_matrix<NumericT> const & A, - compressed_matrix<NumericT> & L) -{ - viennacl::vector<NumericT> D(A.size1(), viennacl::traits::context(A)); - - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx); - - // fill D: - viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_1"); - viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), D) ); - - // scale L: - viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_2"); - viennacl::ocl::enqueue(k2(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), cl_uint(A.size1()), D) ); - -} - -///////////////////////////////////// - - -/** @brief Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenCL (cf. Algorithm 2 in paper) */ -template<typename NumericT> -void icc_chow_patel_sweep(compressed_matrix<NumericT> & L, - vector<NumericT> const & aij_L) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context()); - viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx); - - viennacl::backend::mem_handle L_backup; - viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L)); - viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size()); - - viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "icc_chow_patel_sweep_kernel"); - viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), L_backup.opencl_handle(), cl_uint(L.size1()), - aij_L) - ); - -} - - -/////////////////////// ILU ///////////////////// - -template<typename NumericT> -void extract_LU(compressed_matrix<NumericT> const & A, - compressed_matrix<NumericT> & L, - compressed_matrix<NumericT> & U) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx); - - // - // Step 1: Count elements in L and U: - // - viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_LU_1"); - - viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), cl_uint(A.size1()), - L.handle1().opencl_handle(), - U.handle1().opencl_handle()) - ); - - // - // Step 2: Exclusive scan on row_buffers: - // - viennacl::vector_base<unsigned int> wrapped_L_row_buffer(L.handle1(), A.size1() + 1, 0, 1); - viennacl::linalg::exclusive_scan(wrapped_L_row_buffer, wrapped_L_row_buffer); - L.reserve(wrapped_L_row_buffer[L.size1()], false); - - viennacl::vector_base<unsigned int> wrapped_U_row_buffer(U.handle1(), A.size1() + 1, 0, 1); - viennacl::linalg::exclusive_scan(wrapped_U_row_buffer, wrapped_U_row_buffer); - U.reserve(wrapped_U_row_buffer[U.size1()], false); - - // - // Step 3: Write entries - // - viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "extract_LU_2"); - - viennacl::ocl::enqueue(k2(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), - L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), - U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle()) - ); - - L.generate_row_block_information(); - // Note: block information for U will be generated after transposition - -} // extract_LU - -/////////////////////////////////////////////// - - - -/** @brief Scales the values extracted from A such that A' = DAD has unit diagonal. Updates values from A in L and U accordingly. */ -template<typename NumericT> -void ilu_scale(compressed_matrix<NumericT> const & A, - compressed_matrix<NumericT> & L, - compressed_matrix<NumericT> & U) -{ - viennacl::vector<NumericT> D(A.size1(), viennacl::traits::context(A)); - - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); - viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx); - - // fill D: - viennacl::ocl::kernel & k1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_1"); - viennacl::ocl::enqueue(k1(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.size1()), D) ); - - // scale L: - viennacl::ocl::kernel & k2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_scale_kernel_2"); - viennacl::ocl::enqueue(k2(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), cl_uint(A.size1()), D) ); - - // scale U: - viennacl::ocl::enqueue(k2(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(), cl_uint(A.size1()), D) ); - -} - -///////////////////////////////////// - - -/** @brief Performs one nonlinear relaxation step in the Chow-Patel-ILU using OpenCL (cf. Algorithm 2 in paper) */ -template<typename NumericT> -void ilu_chow_patel_sweep(compressed_matrix<NumericT> & L, - vector<NumericT> const & aij_L, - compressed_matrix<NumericT> & U_trans, - vector<NumericT> const & aij_U_trans) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context()); - viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx); - - viennacl::backend::mem_handle L_backup; - viennacl::backend::memory_create(L_backup, L.handle().raw_size(), viennacl::traits::context(L)); - viennacl::backend::memory_copy(L.handle(), L_backup, 0, 0, L.handle().raw_size()); - - viennacl::backend::mem_handle U_backup; - viennacl::backend::memory_create(U_backup, U_trans.handle().raw_size(), viennacl::traits::context(U_trans)); - viennacl::backend::memory_copy(U_trans.handle(), U_backup, 0, 0, U_trans.handle().raw_size()); - - viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_chow_patel_sweep_kernel"); - viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), L_backup.opencl_handle(), cl_uint(L.size1()), - aij_L, - U_trans.handle1().opencl_handle(), U_trans.handle2().opencl_handle(), U_trans.handle().opencl_handle(), U_backup.opencl_handle(), - aij_U_trans) - ); - -} - -////////////////////////////////////// - - - -template<typename NumericT> -void ilu_form_neumann_matrix(compressed_matrix<NumericT> & R, - vector<NumericT> & diag_R) -{ - viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(R).context()); - viennacl::linalg::opencl::kernels::ilu<NumericT>::init(ctx); - - viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ilu<NumericT>::program_name(), "ilu_form_neumann_matrix_kernel"); - viennacl::ocl::enqueue(k(R.handle1().opencl_handle(), R.handle2().opencl_handle(), R.handle().opencl_handle(), cl_uint(R.size1()), - diag_R) - ); -} - -} //namespace opencl -} //namespace linalg -} //namespace viennacl - - -#endif
