http://git-wip-us.apache.org/repos/asf/mahout/blob/7ae549fa/native-viennaCL/src/main/cpp/viennacl/linalg/cuda/matrix_operations.hpp ---------------------------------------------------------------------- diff --git a/native-viennaCL/src/main/cpp/viennacl/linalg/cuda/matrix_operations.hpp b/native-viennaCL/src/main/cpp/viennacl/linalg/cuda/matrix_operations.hpp deleted file mode 100644 index 912d24d..0000000 --- a/native-viennaCL/src/main/cpp/viennacl/linalg/cuda/matrix_operations.hpp +++ /dev/null @@ -1,2725 +0,0 @@ -#ifndef VIENNACL_LINALG_CUDA_MATRIX_OPERATIONS_HPP_ -#define VIENNACL_LINALG_CUDA_MATRIX_OPERATIONS_HPP_ - -/* ========================================================================= - Copyright (c) 2010-2016, Institute for Microelectronics, - Institute for Analysis and Scientific Computing, - TU Wien. - Portions of this software are copyright by UChicago Argonne, LLC. - - ----------------- - ViennaCL - The Vienna Computing Library - ----------------- - - Project Head: Karl Rupp [email protected] - - (A list of authors and contributors can be found in the manual) - - License: MIT (X11), see file LICENSE in the base directory -============================================================================= */ - -/** @file viennacl/linalg/cuda/matrix_operations.hpp - @brief Implementations of dense matrix related operations, including matrix-vector products, using CUDA. -*/ - -#include "viennacl/forwards.h" -#include "viennacl/scalar.hpp" -#include "viennacl/vector.hpp" -#include "viennacl/vector_proxy.hpp" -#include "viennacl/tools/tools.hpp" -#include "viennacl/meta/enable_if.hpp" -#include "viennacl/meta/predicate.hpp" -#include "viennacl/meta/result_of.hpp" -#include "viennacl/traits/size.hpp" -#include "viennacl/traits/start.hpp" -#include "viennacl/traits/handle.hpp" -#include "viennacl/traits/stride.hpp" - -#include "viennacl/linalg/cuda/common.hpp" - -#include "viennacl/linalg/cuda/vector_operations.hpp" -#include "viennacl/linalg/cuda/matrix_operations_row.hpp" -#include "viennacl/linalg/cuda/matrix_operations_col.hpp" -#include "viennacl/linalg/cuda/matrix_operations_prod.hpp" -#include "viennacl/linalg/cuda/matrix_operations_prod.hpp" - -namespace viennacl -{ -namespace linalg -{ -namespace cuda -{ -// -// Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here! -// - -template<typename DestNumericT, typename SrcNumericT> -void convert(matrix_base<DestNumericT> & mat1, matrix_base<SrcNumericT> const & mat2) -{ - assert(mat1.row_major() == mat2.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!")); - - if (mat1.row_major()) - { - convert_row_kernel<<<128, 128>>>(viennacl::cuda_arg(mat1), - static_cast<unsigned int>(viennacl::traits::start1(mat1)), static_cast<unsigned int>(viennacl::traits::start2(mat1)), - static_cast<unsigned int>(viennacl::traits::stride1(mat1)), static_cast<unsigned int>(viennacl::traits::stride2(mat1)), - static_cast<unsigned int>(viennacl::traits::size1(mat1)), static_cast<unsigned int>(viennacl::traits::size2(mat1)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat1)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat1)), - - viennacl::cuda_arg(mat2), - static_cast<unsigned int>(viennacl::traits::start1(mat2)), static_cast<unsigned int>(viennacl::traits::start2(mat2)), - static_cast<unsigned int>(viennacl::traits::stride1(mat2)), static_cast<unsigned int>(viennacl::traits::stride2(mat2)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat2)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat2)) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("convert_row_kernel"); - } - else - { - convert_col_kernel<<<128, 128>>>(viennacl::cuda_arg(mat1), - static_cast<unsigned int>(viennacl::traits::start1(mat1)), static_cast<unsigned int>(viennacl::traits::start2(mat1)), - static_cast<unsigned int>(viennacl::traits::stride1(mat1)), static_cast<unsigned int>(viennacl::traits::stride2(mat1)), - static_cast<unsigned int>(viennacl::traits::size1(mat1)), static_cast<unsigned int>(viennacl::traits::size2(mat1)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat1)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat1)), - - viennacl::cuda_arg(mat2), - static_cast<unsigned int>(viennacl::traits::start1(mat2)), static_cast<unsigned int>(viennacl::traits::start2(mat2)), - static_cast<unsigned int>(viennacl::traits::stride1(mat2)), static_cast<unsigned int>(viennacl::traits::stride2(mat2)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat2)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat2)) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("convert_col_kernel"); - } -} - -template<typename NumericT, typename SizeT, typename DistanceT> -void trans(matrix_expression<const matrix_base<NumericT, SizeT, DistanceT>,const matrix_base<NumericT, SizeT, DistanceT>, op_trans> const & proxy, - matrix_base<NumericT> & temp_trans) -{ - trans_kernel<<<128,128>>>(viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(proxy.lhs().start1()), static_cast<unsigned int>(proxy.lhs().start2()), - static_cast<unsigned int>(proxy.lhs().internal_size1()), static_cast<unsigned int>(proxy.lhs().internal_size2()), - static_cast<unsigned int>(proxy.lhs().size1()), static_cast<unsigned int>(proxy.lhs().size2()), - static_cast<unsigned int>(proxy.lhs().stride1()), static_cast<unsigned int>(proxy.lhs().stride2()), - - viennacl::cuda_arg(temp_trans), - static_cast<unsigned int>(temp_trans.start1()), static_cast<unsigned int>(temp_trans.start2()), - static_cast<unsigned int>(temp_trans.internal_size1()), static_cast<unsigned int>(temp_trans.internal_size2()), - static_cast<unsigned int>(temp_trans.stride1()), static_cast<unsigned int>(temp_trans.stride2()), - static_cast<bool>(proxy.lhs().row_major())); - VIENNACL_CUDA_LAST_ERROR_CHECK("trans_kernel"); -} - - -template<typename NumericT, typename ScalarT> -void am(matrix_base<NumericT> & mat1, - matrix_base<NumericT> const & mat2, ScalarT const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) -{ - assert(mat1.row_major() == mat2.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha); - - value_type temporary_alpha = 0; - if (viennacl::is_cpu_scalar<ScalarT>::value) - temporary_alpha = alpha; - - if (mat1.row_major()) - { - am_row_kernel<<<128, 128>>>(viennacl::cuda_arg(mat1), - static_cast<unsigned int>(viennacl::traits::start1(mat1)), static_cast<unsigned int>(viennacl::traits::start2(mat1)), - static_cast<unsigned int>(viennacl::traits::stride1(mat1)), static_cast<unsigned int>(viennacl::traits::stride2(mat1)), - static_cast<unsigned int>(viennacl::traits::size1(mat1)), static_cast<unsigned int>(viennacl::traits::size2(mat1)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat1)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat1)), - - viennacl::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)), - options_alpha, - viennacl::cuda_arg(mat2), - static_cast<unsigned int>(viennacl::traits::start1(mat2)), static_cast<unsigned int>(viennacl::traits::start2(mat2)), - static_cast<unsigned int>(viennacl::traits::stride1(mat2)), static_cast<unsigned int>(viennacl::traits::stride2(mat2)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat2)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat2)) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("am_row_kernel"); - } - else - { - am_col_kernel<<<128, 128>>>(viennacl::cuda_arg(mat1), - static_cast<unsigned int>(viennacl::traits::start1(mat1)), static_cast<unsigned int>(viennacl::traits::start2(mat1)), - static_cast<unsigned int>(viennacl::traits::stride1(mat1)), static_cast<unsigned int>(viennacl::traits::stride2(mat1)), - static_cast<unsigned int>(viennacl::traits::size1(mat1)), static_cast<unsigned int>(viennacl::traits::size2(mat1)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat1)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat1)), - - viennacl::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)), - options_alpha, - viennacl::cuda_arg(mat2), - static_cast<unsigned int>(viennacl::traits::start1(mat2)), static_cast<unsigned int>(viennacl::traits::start2(mat2)), - static_cast<unsigned int>(viennacl::traits::stride1(mat2)), static_cast<unsigned int>(viennacl::traits::stride2(mat2)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat2)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat2)) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("am_col_kernel"); - } -} - - -template<typename NumericT, typename ScalarT1, typename ScalarT2> -void ambm(matrix_base<NumericT> & mat1, - matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, - matrix_base<NumericT> const & mat3, ScalarT2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) -{ - assert(mat1.row_major() == mat2.row_major() && mat1.row_major() == mat3.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha); - - value_type temporary_alpha = 0; - if (viennacl::is_cpu_scalar<ScalarT1>::value) - temporary_alpha = alpha; - - - unsigned int options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta); - - value_type temporary_beta = 0; - if (viennacl::is_cpu_scalar<ScalarT2>::value) - temporary_beta = beta; - - - if (mat1.row_major()) - { - ambm_row_kernel<<<128, 128>>>(viennacl::cuda_arg(mat1), - static_cast<unsigned int>(viennacl::traits::start1(mat1)), static_cast<unsigned int>(viennacl::traits::start2(mat1)), - static_cast<unsigned int>(viennacl::traits::stride1(mat1)), static_cast<unsigned int>(viennacl::traits::stride2(mat1)), - static_cast<unsigned int>(viennacl::traits::size1(mat1)), static_cast<unsigned int>(viennacl::traits::size2(mat1)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat1)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat1)), - - viennacl::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)), - options_alpha, - viennacl::cuda_arg(mat2), - static_cast<unsigned int>(viennacl::traits::start1(mat2)), static_cast<unsigned int>(viennacl::traits::start2(mat2)), - static_cast<unsigned int>(viennacl::traits::stride1(mat2)), static_cast<unsigned int>(viennacl::traits::stride2(mat2)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat2)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat2)), - - viennacl::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)), - options_beta, - viennacl::cuda_arg(mat3), - static_cast<unsigned int>(viennacl::traits::start1(mat3)), static_cast<unsigned int>(viennacl::traits::start2(mat3)), - static_cast<unsigned int>(viennacl::traits::stride1(mat3)), static_cast<unsigned int>(viennacl::traits::stride2(mat3)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat3)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat3)) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("ambm_row_kernel"); - } - else - { - ambm_col_kernel<<<128, 128>>>(viennacl::cuda_arg(mat1), - static_cast<unsigned int>(viennacl::traits::start1(mat1)), static_cast<unsigned int>(viennacl::traits::start2(mat1)), - static_cast<unsigned int>(viennacl::traits::stride1(mat1)), static_cast<unsigned int>(viennacl::traits::stride2(mat1)), - static_cast<unsigned int>(viennacl::traits::size1(mat1)), static_cast<unsigned int>(viennacl::traits::size2(mat1)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat1)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat1)), - - viennacl::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)), - options_alpha, - viennacl::cuda_arg(mat2), - static_cast<unsigned int>(viennacl::traits::start1(mat2)), static_cast<unsigned int>(viennacl::traits::start2(mat2)), - static_cast<unsigned int>(viennacl::traits::stride1(mat2)), static_cast<unsigned int>(viennacl::traits::stride2(mat2)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat2)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat2)), - - viennacl::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)), - options_beta, - viennacl::cuda_arg(mat3), - static_cast<unsigned int>(viennacl::traits::start1(mat3)), static_cast<unsigned int>(viennacl::traits::start2(mat3)), - static_cast<unsigned int>(viennacl::traits::stride1(mat3)), static_cast<unsigned int>(viennacl::traits::stride2(mat3)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat3)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat3)) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("ambm_col_kernel"); - } - -} - - -template<typename NumericT, typename ScalarT1, typename ScalarT2> -void ambm_m(matrix_base<NumericT> & mat1, - matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, - matrix_base<NumericT> const & mat3, ScalarT2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) -{ - assert(mat1.row_major() == mat2.row_major() && mat1.row_major() == mat3.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha); - - value_type temporary_alpha = 0; - if (viennacl::is_cpu_scalar<ScalarT1>::value) - temporary_alpha = alpha; - - - unsigned int options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta); - - value_type temporary_beta = 0; - if (viennacl::is_cpu_scalar<ScalarT2>::value) - temporary_beta = beta; - - - if (mat1.row_major()) - { - ambm_m_row_kernel<<<128, 128>>>(viennacl::cuda_arg(mat1), - static_cast<unsigned int>(viennacl::traits::start1(mat1)), static_cast<unsigned int>(viennacl::traits::start2(mat1)), - static_cast<unsigned int>(viennacl::traits::stride1(mat1)), static_cast<unsigned int>(viennacl::traits::stride2(mat1)), - static_cast<unsigned int>(viennacl::traits::size1(mat1)), static_cast<unsigned int>(viennacl::traits::size2(mat1)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat1)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat1)), - - viennacl::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)), - options_alpha, - viennacl::cuda_arg(mat2), - static_cast<unsigned int>(viennacl::traits::start1(mat2)), static_cast<unsigned int>(viennacl::traits::start2(mat2)), - static_cast<unsigned int>(viennacl::traits::stride1(mat2)), static_cast<unsigned int>(viennacl::traits::stride2(mat2)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat2)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat2)), - - viennacl::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)), - options_beta, - viennacl::cuda_arg(mat3), - static_cast<unsigned int>(viennacl::traits::start1(mat3)), static_cast<unsigned int>(viennacl::traits::start2(mat3)), - static_cast<unsigned int>(viennacl::traits::stride1(mat3)), static_cast<unsigned int>(viennacl::traits::stride2(mat3)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat3)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat3)) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("ambm_m_row_kernel"); - } - else - { - ambm_m_col_kernel<<<128, 128>>>(viennacl::cuda_arg(mat1), - static_cast<unsigned int>(viennacl::traits::start1(mat1)), static_cast<unsigned int>(viennacl::traits::start2(mat1)), - static_cast<unsigned int>(viennacl::traits::stride1(mat1)), static_cast<unsigned int>(viennacl::traits::stride2(mat1)), - static_cast<unsigned int>(viennacl::traits::size1(mat1)), static_cast<unsigned int>(viennacl::traits::size2(mat1)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat1)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat1)), - - viennacl::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)), - options_alpha, - viennacl::cuda_arg(mat2), - static_cast<unsigned int>(viennacl::traits::start1(mat2)), static_cast<unsigned int>(viennacl::traits::start2(mat2)), - static_cast<unsigned int>(viennacl::traits::stride1(mat2)), static_cast<unsigned int>(viennacl::traits::stride2(mat2)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat2)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat2)), - - viennacl::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)), - options_beta, - viennacl::cuda_arg(mat3), - static_cast<unsigned int>(viennacl::traits::start1(mat3)), static_cast<unsigned int>(viennacl::traits::start2(mat3)), - static_cast<unsigned int>(viennacl::traits::stride1(mat3)), static_cast<unsigned int>(viennacl::traits::stride2(mat3)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat3)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat3)) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("ambm_m_col_kernel"); - } - -} - - - - -template<typename NumericT> -void matrix_assign(matrix_base<NumericT> & mat, NumericT s, bool clear = false) -{ - typedef NumericT value_type; - value_type alpha = s; - - unsigned int s1 = clear ? viennacl::traits::internal_size1(mat) : viennacl::traits::size1(mat); - unsigned int s2 = clear ? viennacl::traits::internal_size2(mat) : viennacl::traits::size2(mat); - - if (mat.row_major()) - { - - matrix_row_assign_kernel<<<128, 128>>>(viennacl::cuda_arg(mat), - static_cast<unsigned int>(viennacl::traits::start1(mat)), static_cast<unsigned int>(viennacl::traits::start2(mat)), - static_cast<unsigned int>(viennacl::traits::stride1(mat)), static_cast<unsigned int>(viennacl::traits::stride2(mat)), - s1, s2, - static_cast<unsigned int>(viennacl::traits::internal_size1(mat)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat)), - alpha); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_assign_kernel"); - } - else - { - matrix_col_assign_kernel<<<128, 128>>>(viennacl::cuda_arg(mat), - static_cast<unsigned int>(viennacl::traits::start1(mat)), static_cast<unsigned int>(viennacl::traits::start2(mat)), - static_cast<unsigned int>(viennacl::traits::stride1(mat)), static_cast<unsigned int>(viennacl::traits::stride2(mat)), - s1, s2, - static_cast<unsigned int>(viennacl::traits::internal_size1(mat)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat)), - alpha); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_assign_kernel"); - } -} - -template<typename NumericT> -void matrix_diagonal_assign(matrix_base<NumericT> & mat, NumericT s) -{ - typedef NumericT value_type; - value_type alpha = s; - - if (mat.row_major()) - { - matrix_row_diagonal_assign_kernel<<<128, 128>>>(viennacl::cuda_arg(mat), - static_cast<unsigned int>(viennacl::traits::start1(mat)), static_cast<unsigned int>(viennacl::traits::start2(mat)), - static_cast<unsigned int>(viennacl::traits::stride1(mat)), static_cast<unsigned int>(viennacl::traits::stride2(mat)), - static_cast<unsigned int>(viennacl::traits::size1(mat)), static_cast<unsigned int>(viennacl::traits::size2(mat)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat)), - alpha); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_diagonal_assign_kernel"); - } - else - { - matrix_col_diagonal_assign_kernel<<<128, 128>>>(viennacl::cuda_arg(mat), - static_cast<unsigned int>(viennacl::traits::start1(mat)), static_cast<unsigned int>(viennacl::traits::start2(mat)), - static_cast<unsigned int>(viennacl::traits::stride1(mat)), static_cast<unsigned int>(viennacl::traits::stride2(mat)), - static_cast<unsigned int>(viennacl::traits::size1(mat)), static_cast<unsigned int>(viennacl::traits::size2(mat)), - static_cast<unsigned int>(viennacl::traits::internal_size1(mat)), static_cast<unsigned int>(viennacl::traits::internal_size2(mat)), - alpha); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_diagonal_assign_kernel"); - } -} - - -template<typename NumericT> -void matrix_diag_from_vector(const vector_base<NumericT> & vec, int k, matrix_base<NumericT> & mat) -{ - typedef NumericT value_type; - - // Step 1: assign zero matrix: - matrix_assign(mat, NumericT(0)); - - // Step 2: Assign diagonal: - unsigned int options_alpha = 0; - - vcl_size_t mat_start = 0; - vcl_size_t mat_stride = 0; - vcl_size_t mat_size = viennacl::traits::size(vec); - if (mat.row_major()) - { - vcl_size_t first_row_index = 0; - vcl_size_t first_col_index = 0; - if (k < 0) - first_row_index = vcl_size_t(-k); - else - first_col_index = vcl_size_t(k); - mat_start = (viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat)) * viennacl::traits::internal_size2(mat) - + viennacl::traits::start2(mat) + first_col_index * viennacl::traits::stride2(mat); - mat_stride = viennacl::traits::stride1(mat) * viennacl::traits::internal_size2(mat) + viennacl::traits::stride2(mat); - } - else - { - vcl_size_t first_row_index = 0; - vcl_size_t first_col_index = 0; - if (k < 0) - first_row_index = vcl_size_t(-k); - else - first_col_index = vcl_size_t(k); - mat_start = viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat) - + (viennacl::traits::start2(mat) + first_col_index * viennacl::traits::stride2(mat)) * viennacl::traits::internal_size1(mat); - mat_stride = viennacl::traits::stride2(mat) * viennacl::traits::internal_size1(mat) + viennacl::traits::stride1(mat); - } - - av_kernel<<<128, 128>>>(viennacl::cuda_arg(mat), - static_cast<unsigned int>(mat_start), - static_cast<unsigned int>(mat_stride), - static_cast<unsigned int>(mat_size), - - viennacl::cuda_arg<value_type>(NumericT(1)), - options_alpha, - viennacl::cuda_arg(vec), - static_cast<unsigned int>(viennacl::traits::start(vec)), - static_cast<unsigned int>(viennacl::traits::stride(vec)) ); - VIENNACL_CUDA_LAST_ERROR_CHECK("av_kernel"); -} - -template<typename NumericT> -void matrix_diag_to_vector(matrix_base<NumericT> const & mat, int k, vector_base<NumericT> & vec) -{ - typedef NumericT value_type; - - unsigned int options_alpha = 0; - - vcl_size_t mat_start = 0; - vcl_size_t mat_stride = 0; - if (mat.row_major()) - { - vcl_size_t first_row_index = 0; - vcl_size_t first_col_index = 0; - if (k < 0) - first_row_index = vcl_size_t(-k); - else - first_col_index = vcl_size_t(k); - mat_start = (viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat)) * viennacl::traits::internal_size2(mat) - + viennacl::traits::start2(mat) + first_col_index * viennacl::traits::stride2(mat); - mat_stride = viennacl::traits::stride1(mat) * viennacl::traits::internal_size2(mat) + viennacl::traits::stride2(mat); - } - else - { - vcl_size_t first_row_index = 0; - vcl_size_t first_col_index = 0; - if (k < 0) - first_row_index = vcl_size_t(-k); - else - first_col_index = vcl_size_t(k); - mat_start = viennacl::traits::start1(mat) + first_row_index * viennacl::traits::stride1(mat) - + (viennacl::traits::start2(mat) + first_col_index * viennacl::traits::stride2(mat)) * viennacl::traits::internal_size1(mat); - mat_stride = viennacl::traits::stride2(mat) * viennacl::traits::internal_size1(mat) + viennacl::traits::stride1(mat); - } - - av_kernel<<<128, 128>>>(viennacl::cuda_arg(vec), - static_cast<unsigned int>(viennacl::traits::start(vec)), - static_cast<unsigned int>(viennacl::traits::stride(vec)), - static_cast<unsigned int>(viennacl::traits::size(vec)), - - viennacl::cuda_arg<value_type>(NumericT(1)), - options_alpha, - viennacl::cuda_arg(mat), - static_cast<unsigned int>(mat_start), - static_cast<unsigned int>(mat_stride)); - VIENNACL_CUDA_LAST_ERROR_CHECK("av_kernel"); -} - -template<typename NumericT> -void matrix_row(matrix_base<NumericT> const & mat, unsigned int i, vector_base<NumericT> & vec) -{ - typedef NumericT value_type; - - unsigned int options_alpha = 0; - - vcl_size_t mat_start = 0; - vcl_size_t mat_stride = 0; - if (mat.row_major()) - { - mat_start = (viennacl::traits::start1(mat) + i * viennacl::traits::stride1(mat)) * viennacl::traits::internal_size2(mat) + viennacl::traits::start2(mat); - mat_stride = viennacl::traits::stride2(mat); - } - else - { - mat_start = viennacl::traits::start1(mat) + i * viennacl::traits::stride1(mat) + viennacl::traits::start2(mat) * viennacl::traits::internal_size1(mat); - mat_stride = viennacl::traits::stride2(mat) * viennacl::traits::internal_size1(mat); - } - - av_kernel<<<128, 128>>>(viennacl::cuda_arg(vec), - static_cast<unsigned int>(viennacl::traits::start(vec)), - static_cast<unsigned int>(viennacl::traits::stride(vec)), - static_cast<unsigned int>(viennacl::traits::size(vec)), - - viennacl::cuda_arg<value_type>(NumericT(1)), - options_alpha, - viennacl::cuda_arg(mat), - static_cast<unsigned int>(mat_start), - static_cast<unsigned int>(mat_stride)); - VIENNACL_CUDA_LAST_ERROR_CHECK("av_kernel"); -} - -template<typename NumericT> -void matrix_column(const matrix_base<NumericT> & mat, unsigned int j, vector_base<NumericT> & vec) -{ - typedef NumericT value_type; - - unsigned int options_alpha = 0; - - vcl_size_t mat_start = 0; - vcl_size_t mat_stride = 0; - if (mat.row_major()) - { - mat_start = viennacl::traits::start1(mat) * viennacl::traits::internal_size2(mat) + viennacl::traits::start2(mat) + j * viennacl::traits::stride2(mat); - mat_stride = viennacl::traits::stride2(mat) * viennacl::traits::internal_size2(mat); - } - else - { - mat_start = viennacl::traits::start1(mat) + (viennacl::traits::start2(mat) + j * viennacl::traits::stride2(mat)) * viennacl::traits::internal_size1(mat); - mat_stride = viennacl::traits::stride2(mat); - } - - av_kernel<<<128, 128>>>(viennacl::cuda_arg(vec), - static_cast<unsigned int>(viennacl::traits::start(vec)), - static_cast<unsigned int>(viennacl::traits::stride(vec)), - static_cast<unsigned int>(viennacl::traits::size(vec)), - - viennacl::cuda_arg<value_type>(NumericT(1)), - options_alpha, - viennacl::cuda_arg(mat), - static_cast<unsigned int>(mat_start), - static_cast<unsigned int>(mat_stride)); - VIENNACL_CUDA_LAST_ERROR_CHECK("av_kernel"); -} - - -// -///////////////////////// binary element-wise operations ///////////////////////////////// -// - - -template<typename NumericT, typename SizeT, typename OpT> -void element_op(matrix_base<NumericT, SizeT> & A, - matrix_expression<const matrix_base<NumericT, SizeT>, const matrix_base<NumericT, SizeT>, op_element_binary<OpT> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - unsigned int op_type = 2; //0: product, 1: division, 2: power - if (viennacl::is_division<OpT>::value) - op_type = 1; - else if (viennacl::is_product<OpT>::value) - op_type = 0; - - if (A.row_major()) - { - element_op_int_row_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())), - - viennacl::cuda_arg(proxy.rhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.rhs())), - - op_type - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("element_op_row_kernel"); - } - else - { - element_op_int_col_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())), - - viennacl::cuda_arg(proxy.rhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.rhs())), - - op_type - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("element_op_col_kernel"); - } -} - -template<typename SizeT, typename OpT> -void element_op(matrix_base<float, SizeT> & A, - matrix_expression<const matrix_base<float, SizeT>, const matrix_base<float, SizeT>, op_element_binary<OpT> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef float value_type; - - unsigned int op_type = 2; //0: product, 1: division, 2: power - if (viennacl::is_division<OpT>::value) - op_type = 1; - else if (viennacl::is_product<OpT>::value) - op_type = 0; - - if (A.row_major()) - { - element_op_row_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())), - - viennacl::cuda_arg(proxy.rhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.rhs())), - - op_type - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("element_op_row_kernel"); - } - else - { - element_op_col_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())), - - viennacl::cuda_arg(proxy.rhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.rhs())), - - op_type - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("element_op_col_kernel"); - } -} - -template<typename SizeT, typename OpT> -void element_op(matrix_base<double, SizeT> & A, - matrix_expression<const matrix_base<double, SizeT>, const matrix_base<double, SizeT>, op_element_binary<OpT> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef double value_type; - - unsigned int op_type = 2; //0: product, 1: division, 2: power - if (viennacl::is_division<OpT>::value) - op_type = 1; - else if (viennacl::is_product<OpT>::value) - op_type = 0; - - if (A.row_major()) - { - element_op_row_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())), - - viennacl::cuda_arg(proxy.rhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.rhs())), - - op_type - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("element_op_row_kernel"); - } - else - { - element_op_col_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())), - - viennacl::cuda_arg(proxy.rhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.rhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.rhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.rhs())), - - op_type - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("element_op_col_kernel"); - } -} - -// -///////////////////////// unary element-wise operations ///////////////////////////////// -// - -// Note: Due to CUDA vs C-proprocessor interference (concatenation seems to be broken in at least CUDA 4.2), -// we could not find a more 'automatic' way of generating the overloads below... - -// abs -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_abs> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_abs_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_abs_kernel"); - } - else - { - matrix_col_element_abs_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_abs_kernel"); - } -} - - -// acos -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_acos> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_acos_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_acos_kernel"); - } - else - { - matrix_col_element_acos_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_acos_kernel"); - } -} - - -// asin -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_asin> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_asin_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_asin_kernel"); - } - else - { - matrix_col_element_asin_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_sin_kernel"); - } -} - - -// atan -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_atan> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_atan_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_atan_kernel"); - } - else - { - matrix_col_element_atan_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_atan_kernel"); - } -} - - -// ceil -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_ceil> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_ceil_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_ceil_kernel"); - } - else - { - matrix_col_element_ceil_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_ceil_kernel"); - } -} - - -// cos -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_cos> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_cos_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_cos_kernel"); - } - else - { - matrix_col_element_cos_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_cos_kernel"); - } -} - - -// cosh -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_cosh> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_cosh_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_cosh_kernel"); - } - else - { - matrix_col_element_cosh_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_cosh_kernel"); - } -} - - -// exp -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_exp> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_exp_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_exp_kernel"); - } - else - { - matrix_col_element_exp_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_exp_kernel"); - } -} - - -// fabs -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_fabs> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_fabs_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_fabs_kernel"); - } - else - { - matrix_col_element_fabs_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_fabs_kernel"); - } -} - - -// floor -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_floor> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_floor_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_floor_kernel"); - } - else - { - matrix_col_element_floor_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_floor_kernel"); - } -} - - -// log -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_log> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_log_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_log_kernel"); - } - else - { - matrix_col_element_log_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_log_kernel"); - } -} - - -// log10 -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_log10> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_log10_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_log10_kernel"); - } - else - { - matrix_col_element_log10_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_log10_kernel"); - } -} - - -// sin -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_sin> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_sin_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_sin_kernel"); - } - else - { - matrix_col_element_sin_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_sin_kernel"); - } -} - - -// sinh -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_sinh> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_sinh_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_row_element_sinh_kernel"); - } - else - { - matrix_col_element_sinh_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsigned int>(viennacl::traits::internal_size1(A)), static_cast<unsigned int>(viennacl::traits::internal_size2(A)), - - viennacl::cuda_arg(proxy.lhs()), - static_cast<unsigned int>(viennacl::traits::start1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::start2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::stride1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::stride2(proxy.lhs())), - static_cast<unsigned int>(viennacl::traits::internal_size1(proxy.lhs())), static_cast<unsigned int>(viennacl::traits::internal_size2(proxy.lhs())) - ); - VIENNACL_CUDA_LAST_ERROR_CHECK("matrix_col_element_sinh_kernel"); - } -} - - -// sqrt -template<typename NumericT> -void element_op(matrix_base<NumericT> & A, - matrix_expression<const matrix_base<NumericT>, const matrix_base<NumericT>, op_element_unary<op_sqrt> > const & proxy) -{ - assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!")); - - typedef NumericT value_type; - - if (A.row_major()) - { - matrix_row_element_sqrt_kernel<<<128, 128>>>(viennacl::cuda_arg(A), - static_cast<unsigned int>(viennacl::traits::start1(A)), static_cast<unsigned int>(viennacl::traits::start2(A)), - static_cast<unsigned int>(viennacl::traits::stride1(A)), static_cast<unsigned int>(viennacl::traits::stride2(A)), - static_cast<unsigned int>(viennacl::traits::size1(A)), static_cast<unsigned int>(viennacl::traits::size2(A)), - static_cast<unsign
<TRUNCATED>
